source: greenstone3/trunk/src/java/org/greenstone/gsdl3/LibraryServlet.java@ 14524

Last change on this file since 14524 was 14524, checked in by qq6, 17 years ago

the web page is redirect to an external link of "el=" is found from the querystring

  • Property svn:keywords set to Author Date Id Revision
File size: 22.3 KB
Line 
1package org.greenstone.gsdl3;
2
3import org.greenstone.gsdl3.comms.*;
4import org.greenstone.gsdl3.core.*;
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.action.PageAction; // used to get the default action
7import org.w3c.dom.Document;
8import org.w3c.dom.Element;
9import org.w3c.dom.NodeList;
10import java.io.*;
11import javax.servlet.*;
12import javax.servlet.http.*;
13import java.util.Enumeration;
14import java.util.ArrayList;
15import java.util.HashMap;
16import java.io.File;
17import java.util.Hashtable;
18import org.apache.log4j.*;
19
20
21/** a servlet to serve the greenstone library - we are using servlets instead
22 * of cgi
23 * the init method is called only once - the first time the servlet classes
24 * are loaded. Each time a request comes in to the servlet, the session()
25 * method is called in a new thread (calls doGet/doPut etc)
26 * takes the a=p&p=home type args and builds a simple request to send to
27 * its receptionist, which returns a result in html, cos output=html
28 * is set in the request
29 *
30 * 18/Jul/07 xiao
31 * modify to make the cached parameters collection-specific.
32 * Most of the work is done in doGet(), except adding an inner class UserSessionCache.
33 *
34 * @see Receptionist
35 */
36public class LibraryServlet extends HttpServlet {
37
38 /** the receptionist to send messages to */
39 protected Receptionist recept=null;
40 /** the default language - is specified by setting a servlet param,
41 * otherwise DEFAULT_LANG is used*/
42 protected String default_lang= null;
43 /** The default default - used if a default lang is not specified
44 * in the servlet params */
45 protected final String DEFAULT_LANG = "en";
46 /** container Document to create XML Nodes */
47 protected Document doc=null;
48 /** a converter class to parse XML and create Docs */
49 protected XMLConverter converter=null;
50 /** the cgi stuff - the Receptionist can add new args to this
51 *
52 * its used by the servlet to determine what args to save */
53 protected GSParams params = null;
54
55 /** user id - new one per session. This doesn't work if session state is saved between restarts - this requires this value to be saved too. */
56 protected int next_user_id = 0;
57
58 /** a hash that contains all the active session IDs mapped to the cached items
59 * It is updated whenever the whole site or a particular collection is reconfigured
60 * using the command a=s&sa=c or a=s&sa=c&c=xxx
61 * It is in the form: sid -> (UserSessionCache object)
62 */
63 protected Hashtable session_ids_table = new Hashtable();
64 //this name is combined with the collection name and used for caching
65 protected String valid_site_name = "";
66
67 /** the maximum interval that the cached info remains in session_ids_table (in seconds)
68 * This is set in web.xml
69 */
70 protected int session_expiration = 1800;
71
72 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.LibraryServlet.class.getName());
73
74 /** initialise the servlet
75 */
76 public void init(ServletConfig config) throws ServletException {
77 // always call super.init;
78 super.init(config);
79 // disable preferences - does this work anyway??
80 //System.setProperty("java.util.prefs.PreferencesFactory", "org.greenstone.gsdl3.util.DisabledPreferencesFactory");
81
82
83 String library_name = config.getInitParameter(GSConstants.LIBRARY_NAME);
84 String gsdl3_home = config.getInitParameter(GSConstants.GSDL3_HOME);
85 String interface_name = config.getInitParameter(GSConstants.INTERFACE_NAME);
86 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
87 String sess_expire = config.getInitParameter(GSXML.SESSION_EXPIRATION);
88 if (sess_expire != null && !sess_expire.equals("")) {
89 this.session_expiration = Integer.parseInt(sess_expire);
90 }
91
92 if (library_name == null || interface_name ==null) {
93 // must have this
94 System.err.println("initialisation parameters not all set!");
95 System.err.println(" you must have libraryname and interfacename");
96 System.exit(1);
97 }
98
99 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
100 String remote_site_name = null;
101 String remote_site_type = null;
102 String remote_site_address = null;
103
104 if (site_name == null) {
105 // no site, try for communicator
106 remote_site_name = config.getInitParameter("remote_site_name");
107 remote_site_type = config.getInitParameter("remote_site_type");
108 remote_site_address = config.getInitParameter("remote_site_address");
109 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
110 System.err.println("initialisation paramters not all set!");
111 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
112 System.exit(1);
113 }
114 }
115 valid_site_name = (site_name != null)? site_name : remote_site_name;
116
117 if (this.default_lang == null) {
118 // choose english
119 this.default_lang = DEFAULT_LANG;
120 }
121
122 HashMap config_params = new HashMap();
123
124 config_params.put(GSConstants.LIBRARY_NAME, library_name);
125 config_params.put(GSConstants.INTERFACE_NAME, interface_name);
126 if (site_name != null) {
127 config_params.put(GSConstants.SITE_NAME, site_name);
128 }
129 this.converter = new XMLConverter();
130 this.doc = this.converter.newDOM();
131
132 // the receptionist -the servlet will talk to this
133 String recept_name = (String)config.getInitParameter("receptionist_class");
134 if (recept_name == null) {
135 this.recept = new DefaultReceptionist();
136 } else {
137 try {
138 this.recept = (Receptionist)Class.forName("org.greenstone.gsdl3.core."+recept_name).newInstance();
139 } catch (Exception e) { // cant use this new one, so use normal one
140 System.err.println("LibraryServlet configure exception when trying to use a new Receptionist "+recept_name+": "+e.getMessage());
141 e.printStackTrace();
142 this.recept = new DefaultReceptionist();
143 }
144 }
145 this.recept.setConfigParams(config_params);
146
147 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
148 if (site_name != null) {
149 String mr_name = (String)config.getInitParameter("messagerouter_class");
150 MessageRouter message_router = null;
151 if (mr_name == null) { // just use the normal MR
152 message_router = new MessageRouter();
153 } else { // try the specified one
154 try {
155 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
156 } catch (Exception e) { // cant use this new one, so use normal one
157 System.err.println("LibraryServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
158 e.printStackTrace();
159 message_router = new MessageRouter();
160 }
161 }
162
163 message_router.setSiteName(site_name);
164 message_router.setLibraryName(library_name);
165 message_router.configure();
166 this.recept.setMessageRouter(message_router);
167 } else {
168 // talking to a remote site, create a communicator
169 Communicator communicator = null;
170 // we need to create the XML to configure the communicator
171 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
172 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
173 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
174 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
175
176 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
177 communicator = new SOAPCommunicator();
178 } else {
179 System.err.println("LibraryServlet.init Error: invalid Communicator type: "+remote_site_type);
180 System.exit(1);
181 }
182
183 if (!communicator.configure(site_elem)) {
184 System.err.println("LibraryServlet.init Error: Couldn't configure communicator");
185 System.exit(1);
186 }
187 this.recept.setMessageRouter(communicator);
188 }
189
190 // the params arg thingy
191
192 String params_name = (String)config.getInitParameter("params_class");
193 if (params_name == null) {
194 this.params = new GSParams();
195 } else {
196 try {
197 this.params = (GSParams)Class.forName("org.greenstone.gsdl3.util."+params_name).newInstance();
198 } catch (Exception e) {
199 System.err.println("LibraryServlet configure exception when trying to use a new params thing "+params_name+": "+e.getMessage());
200 e.printStackTrace();
201 this.params = new GSParams();
202 }
203 }
204 // pass it to the receptionist
205 this.recept.setParams(this.params);
206 this.recept.configure();
207
208 }
209
210
211 private void logUsageInfo(HttpServletRequest request){
212 String usageInfo = "";
213
214 //session-info: get params stored in the session
215 HttpSession session = request.getSession(true);
216 Enumeration attributeNames = session.getAttributeNames();
217 while(attributeNames.hasMoreElements()) {
218 String name = (String)attributeNames.nextElement();
219 usageInfo +=name+"="+session.getAttribute(name)+" ";
220 }
221
222 //logged info = general-info + session-info
223 usageInfo =
224 request.getServletPath()+" "+ //serlvet
225 "["+request.getQueryString()+"]" +" "+ //the query string
226 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
227 request.getRemoteAddr()+" "+ //remote address
228 request.getRequestedSessionId()+" "+ //session id
229 request.getHeader("user-agent")+" "; //the remote brower info
230
231 logger.info(usageInfo);
232
233 }
234
235 public class UserSessionCache implements HttpSessionBindingListener {
236
237 String session_id = "";
238
239 /** a hash that maps the session ID to a hashtable that maps the coll_name to its parameters
240 * coll_name -> Hashtable (param_name -> param_value)
241 */
242 protected Hashtable coll_name_params_table = null;
243
244 public UserSessionCache(String id, Hashtable table) {
245 session_id = id;
246 coll_name_params_table = (table == null)? new Hashtable() : table;
247 }
248
249 protected void cleanupCache(String coll_name) {
250 if (coll_name_params_table.containsKey(coll_name)) {
251 coll_name_params_table.remove(coll_name);
252 }
253 }
254 protected Hashtable getParamsTable() {
255 return coll_name_params_table;
256 }
257 public void valueBound(HttpSessionBindingEvent event) {
258 // Do nothing
259 }
260
261 public void valueUnbound(HttpSessionBindingEvent event) {
262 if(session_ids_table.containsKey(session_id)) {
263 session_ids_table.remove(session_id);
264 }
265 }
266 public int tableSize() {
267 return (coll_name_params_table == null)? 0 : coll_name_params_table.size();
268 }
269 }
270
271 public void doGet (HttpServletRequest request,
272 HttpServletResponse response)
273 throws ServletException, IOException {
274 logUsageInfo (request);
275
276 String query_string = request.getQueryString();
277 if (query_string!=null){
278 String[] query_arr = query_string.split("&");
279 boolean redirect = false;
280 String href = null;
281 String rl = null;
282 for (int i=0;i<query_arr.length;i++){
283 if (query_arr[i].startsWith("el")){
284 redirect = true;
285 }else if(query_arr[i].startsWith("href")){
286 href = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
287 href = href.replaceAll("%2f", "/");
288 href = href.replaceAll("%7e", "~");
289 href = href.replaceAll("%3f", "?");
290 href = href.replaceAll("%3A", "\\:");
291 }else if(query_arr[i].startsWith("rl")){
292 rl = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
293 }
294 }
295 //if query_string contains "el=", the web page will be redirected to the external URl, otherwise a greenstone page with an external URL will be displayed
296 //"rl=0" this is an external link
297 //"rl=1" this is an internal link
298 if ((redirect) && (href != null) && (rl.equals("0"))){// This is an external link, the web page is re-directed to the external URL (&el=&rl=0&href="http://...")
299 response.setContentType("text/xml");
300 response.sendRedirect(href);
301 }
302 }
303 // Nested Diagnostic Configurator to identify the client for
304
305 HttpSession session = request.getSession (true);
306 String uid = (String)session.getAttribute (GSXML.USER_ID_ATT);
307 if (uid ==null) {
308 uid = ""+getNextUserId ();
309 session.setAttribute (GSXML.USER_ID_ATT, uid);
310 }
311 request.setCharacterEncoding ("UTF-8");
312 response.setContentType ("text/html;charset=UTF-8");
313 PrintWriter out = response.getWriter ();
314
315 String lang = request.getParameter (GSParams.LANGUAGE);
316 if (lang==null || lang.equals ("")) {
317 // try the session cached lang
318 lang = (String)session.getAttribute (GSParams.LANGUAGE);
319 if (lang==null || lang.equals ("")) {
320 // still not set, use the default
321 lang = this.default_lang;
322 }
323 }
324
325 // set the lang in the session
326 session.setAttribute (GSParams.LANGUAGE, lang);
327
328 String output = request.getParameter (GSParams.OUTPUT);
329 if (output==null || output.equals ("")) {
330 output = "html"; // uses html by default
331 }
332
333 // the request to the receptionist
334 Element xml_message = this.doc.createElement (GSXML.MESSAGE_ELEM);
335 Element xml_request = GSXML.createBasicRequest (this.doc, GSXML.REQUEST_TYPE_PAGE, "", lang, uid);
336 xml_request.setAttribute (GSXML.OUTPUT_ATT, output);
337 xml_message.appendChild (xml_request);
338
339 String action = request.getParameter (GSParams.ACTION);
340 String subaction = request.getParameter (GSParams.SUBACTION);
341 String collection = request.getParameter(GSParams.COLLECTION);
342
343 //specifically we clean up the cache session_ids_table if the two reconfigure command
344 //are issued: a=s&sa=c and a=s&sa=c&c=coll_name, in which case there is no caching action to be taken
345 boolean should_cache = true;
346 if(action != null && action.equals(GSParams.SYSTEM)
347 && subaction != null && subaction.equals(GSParams.CONFIGURE)) {
348 if (collection == null || collection.equals("")) {
349 //user reconfiugred the whole site, clean up all cached info
350 //logger.info("clear cache for the whole site.");
351 session_ids_table = new Hashtable();
352
353 } else {
354 //clean up all cache info related to the collection
355 //logger.info("clear cache for collection: " + collection);
356 ArrayList cache_list = new ArrayList(session_ids_table.values());
357 for (int i=0; i<cache_list.size(); i++) {
358 UserSessionCache cache = (UserSessionCache)cache_list.get(i);
359 cache.cleanupCache(collection);
360 }
361 }
362 should_cache = false;
363 }
364
365 // logger.info("should_cache= " + should_cache);
366 //clear the collection-specific cache in the session, since we have no way to know whether this session is
367 //about the same collection as the last session or not.
368 Enumeration attributeNames = session.getAttributeNames();
369 while(attributeNames.hasMoreElements()) {
370 String name = (String)attributeNames.nextElement();
371 if (!name.equals (GSXML.USER_SESSION_CACHE_ATT)
372 && !name.equals (GSParams.LANGUAGE)
373 && !name.equals (GSXML.USER_ID_ATT)) {
374 session.removeAttribute(name);
375 }
376 }
377
378 UserSessionCache session_cache = null;
379 Hashtable param_table = null;
380 Hashtable table = null;
381 String sid = session.getId();
382 if (should_cache == true && collection != null && !collection.equals("")) {
383 String key_str = valid_site_name + collection;
384 if (session_ids_table.containsKey(sid)) {
385 session_cache = (UserSessionCache)session_ids_table.get(sid);
386 param_table = session_cache.getParamsTable();
387 logger.info("collections in table: " + tableToString(param_table));
388 if (param_table.containsKey(key_str)) {
389 //logger.info("existing table: " + collection);
390 table = (Hashtable)param_table.get(key_str);
391 } else {
392 table = new Hashtable();
393 param_table.put(key_str, table);
394 //logger.info("new table: " + collection);
395 }
396 } else {
397 param_table = new Hashtable();
398 table = new Hashtable();
399 param_table.put(key_str, table);
400 session_cache = new UserSessionCache(sid, param_table);
401 session_ids_table.put(sid, session_cache);
402 //logger.info("new session id");
403 }
404 }
405
406 if (action==null || action.equals ("")) {
407 // should we do all the following stuff if using default page?
408 // display the home page - the default page
409 action = "p";
410 subaction = PageAction.HOME_PAGE;
411
412 xml_request.setAttribute (GSXML.ACTION_ATT, action);
413 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
414
415 } else {
416
417 xml_request.setAttribute (GSXML.ACTION_ATT, action);
418 if (subaction != null) {
419 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
420 }
421
422 // create the param list for the greenstone request - includes
423 // the params from the current request and any others from the saved session
424 Element xml_param_list = this.doc.createElement (GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
425 xml_request.appendChild (xml_param_list);
426
427 Enumeration params = request.getParameterNames ();
428 while(params.hasMoreElements ()) {
429 String name = (String)params.nextElement ();
430 if (!name.equals (GSParams.ACTION)
431 && !name.equals (GSParams.SUBACTION)
432 && !name.equals (GSParams.LANGUAGE)
433 && !name.equals (GSParams.OUTPUT)) {// we have already dealt with these
434 String value="";
435 String [] values = request.getParameterValues (name);
436 value = values[0];
437 if (values.length > 1) {
438 for (int i=1; i< values.length; i++) {
439 value += ","+values[i];
440 }
441 }
442 // either add it to the param list straight away, or save it to the session and add it later
443 if (this.params.shouldSave (name)) {
444 if (table != null) {
445 table.put(name, value);
446 }
447 } else {
448 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
449 param.setAttribute (GSXML.NAME_ATT, name);
450 param.setAttribute (GSXML.VALUE_ATT, GSXML.xmlSafe (value));
451 xml_param_list.appendChild (param);
452 }
453 }
454 }
455 //put everything in the table into the session
456 if (table != null) {
457 Enumeration keys = table.keys ();
458 while(keys.hasMoreElements ()) {
459 String name = (String)keys.nextElement();
460 session.setAttribute(name, (String)table.get(name));
461 }
462 }
463
464 // put in all the params from the session cache
465 params = session.getAttributeNames ();
466 while(params.hasMoreElements ()) {
467 String name = (String)params.nextElement ();
468
469 if ( !name.equals (GSXML.USER_SESSION_CACHE_ATT)
470 && !name.equals (GSParams.LANGUAGE)
471 && !name.equals (GSXML.USER_ID_ATT)) {
472 // lang and uid are stored but we dont want it in the param list cos its already in the request
473 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
474 param.setAttribute (GSXML.NAME_ATT, name);
475 String value = GSXML.xmlSafe ((String)session.getAttribute (name));
476 // ugly hack to undo : escaping
477 value = value.replaceAll ("%3A", "\\:");
478 param.setAttribute (GSXML.VALUE_ATT,value);
479 xml_param_list.appendChild (param);
480 }
481 }
482
483 //now set the UserSessionCache object into the session with its life expectancy
484 if (session_cache != null) {
485 session.setAttribute(GSXML.USER_SESSION_CACHE_ATT, session_cache);
486 session.setMaxInactiveInterval(session_expiration);
487 }
488 }
489
490 if (!output.equals ("html")) {
491 response.setContentType ("text/xml"); // for now use text
492 }
493
494 //GSXML.printXMLNode(xml_message);
495
496 Element xml_result = this.recept.process (xml_message);
497 encodeURLs (xml_result, response);
498 out.println (this.converter.getPrettyString (xml_result));
499
500 displaySize(session_ids_table);
501 }
502 //a debugging method
503 private void displaySize(Hashtable table) {
504 if(table == null) {
505 logger.info("cached table is null");
506 return;
507 }
508 if (table.size() == 0) {
509 logger.info("cached table size is zero");
510 return;
511 }
512 int num_cached_coll = 0;
513 ArrayList cache_list = new ArrayList(table.values());
514 for (int i=0; i<cache_list.size(); i++) {
515 num_cached_coll += ((UserSessionCache)cache_list.get(i)).tableSize();
516 }
517 logger.info("Number of sessions : total number of cached collection info = " + table.size() + " : " + num_cached_coll);
518 }
519 /** merely a debugging method! */
520 private String tableToString(Hashtable table) {
521 String str = "";
522 Enumeration keys = table.keys ();
523 while(keys.hasMoreElements ()) {
524 String name = (String)keys.nextElement();
525 str += name + ", ";
526 }
527 return str;
528 }
529
530 /** this goes through each URL and adds in a session id if needed--
531 * its needed if the browser doesn't accept cookies
532 * also escapes things if needed
533 */
534 protected void encodeURLs(Element data, HttpServletResponse response) {
535
536 if (data == null) {
537 return;
538 }
539 // get all the <a> elements
540 NodeList hrefs = data.getElementsByTagName("a");
541 for (int i=0; i<hrefs.getLength(); i++) {
542 Element a = (Element)hrefs.item(i);
543 // ugly hack to get rid of : in the args - interferes with session handling
544 String href = a.getAttribute("href");
545 if (!href.equals("")) {
546 if (href.indexOf("?")!=-1) {
547 String[] parts = href.split("\\?", -1);
548 parts[1]=parts[1].replaceAll(":", "%3A");
549 href = parts[0]+"?"+parts[1];
550 }
551 a.setAttribute("href", response.encodeURL(href));
552 }
553 }
554
555 // now find any submit bits - get all the <form> elements
556 NodeList forms = data.getElementsByTagName("form");
557 for (int i=0; i<forms.getLength(); i++) {
558 Element form = (Element)forms.item(i);
559 form.setAttribute("action", response.encodeURL(form.getAttribute("action")));
560 }
561 // are these the only cases where URLs occur??
562 // we should only do this for greenstone urls?
563
564 }
565
566 synchronized protected int getNextUserId() {
567 next_user_id++;
568 return next_user_id;
569 }
570
571 public void doPost(HttpServletRequest request,
572 HttpServletResponse response)
573 throws ServletException, IOException {
574 doGet(request,response);
575
576 }
577}
Note: See TracBrowser for help on using the repository browser.