source: greenstone3/trunk/src/java/org/greenstone/gsdl3/LibraryServlet.java@ 14551

Last change on this file since 14551 was 14551, checked in by qq6, 17 years ago

go diectly to the external page if el=direct

  • Property svn:keywords set to Author Date Id Revision
File size: 22.3 KB
Line 
1package org.greenstone.gsdl3;
2
3import org.greenstone.gsdl3.comms.*;
4import org.greenstone.gsdl3.core.*;
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.action.PageAction; // used to get the default action
7import org.w3c.dom.Document;
8import org.w3c.dom.Element;
9import org.w3c.dom.NodeList;
10import java.io.*;
11import javax.servlet.*;
12import javax.servlet.http.*;
13import java.util.Enumeration;
14import java.util.ArrayList;
15import java.util.HashMap;
16import java.io.File;
17import java.util.Hashtable;
18import org.apache.log4j.*;
19
20
21/** a servlet to serve the greenstone library - we are using servlets instead
22 * of cgi
23 * the init method is called only once - the first time the servlet classes
24 * are loaded. Each time a request comes in to the servlet, the session()
25 * method is called in a new thread (calls doGet/doPut etc)
26 * takes the a=p&p=home type args and builds a simple request to send to
27 * its receptionist, which returns a result in html, cos output=html
28 * is set in the request
29 *
30 * 18/Jul/07 xiao
31 * modify to make the cached parameters collection-specific.
32 * Most of the work is done in doGet(), except adding an inner class UserSessionCache.
33 *
34 * @see Receptionist
35 */
36public class LibraryServlet extends HttpServlet {
37
38 /** the receptionist to send messages to */
39 protected Receptionist recept=null;
40 /** the default language - is specified by setting a servlet param,
41 * otherwise DEFAULT_LANG is used*/
42 protected String default_lang= null;
43 /** The default default - used if a default lang is not specified
44 * in the servlet params */
45 protected final String DEFAULT_LANG = "en";
46 /** container Document to create XML Nodes */
47 protected Document doc=null;
48 /** a converter class to parse XML and create Docs */
49 protected XMLConverter converter=null;
50 /** the cgi stuff - the Receptionist can add new args to this
51 *
52 * its used by the servlet to determine what args to save */
53 protected GSParams params = null;
54
55 /** user id - new one per session. This doesn't work if session state is saved between restarts - this requires this value to be saved too. */
56 protected int next_user_id = 0;
57
58 /** a hash that contains all the active session IDs mapped to the cached items
59 * It is updated whenever the whole site or a particular collection is reconfigured
60 * using the command a=s&sa=c or a=s&sa=c&c=xxx
61 * It is in the form: sid -> (UserSessionCache object)
62 */
63 protected Hashtable session_ids_table = new Hashtable();
64 //this name is combined with the collection name and used for caching
65 protected String valid_site_name = "";
66
67 /** the maximum interval that the cached info remains in session_ids_table (in seconds)
68 * This is set in web.xml
69 */
70 protected int session_expiration = 1800;
71
72 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.LibraryServlet.class.getName());
73
74 /** initialise the servlet
75 */
76 public void init(ServletConfig config) throws ServletException {
77 // always call super.init;
78 super.init(config);
79 // disable preferences - does this work anyway??
80 //System.setProperty("java.util.prefs.PreferencesFactory", "org.greenstone.gsdl3.util.DisabledPreferencesFactory");
81
82
83 String library_name = config.getInitParameter(GSConstants.LIBRARY_NAME);
84 String gsdl3_home = config.getInitParameter(GSConstants.GSDL3_HOME);
85 String interface_name = config.getInitParameter(GSConstants.INTERFACE_NAME);
86 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
87 String sess_expire = config.getInitParameter(GSXML.SESSION_EXPIRATION);
88 if (sess_expire != null && !sess_expire.equals("")) {
89 this.session_expiration = Integer.parseInt(sess_expire);
90 }
91
92 if (library_name == null || interface_name ==null) {
93 // must have this
94 System.err.println("initialisation parameters not all set!");
95 System.err.println(" you must have libraryname and interfacename");
96 System.exit(1);
97 }
98
99 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
100 String remote_site_name = null;
101 String remote_site_type = null;
102 String remote_site_address = null;
103
104 if (site_name == null) {
105 // no site, try for communicator
106 remote_site_name = config.getInitParameter("remote_site_name");
107 remote_site_type = config.getInitParameter("remote_site_type");
108 remote_site_address = config.getInitParameter("remote_site_address");
109 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
110 System.err.println("initialisation paramters not all set!");
111 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
112 System.exit(1);
113 }
114 }
115 valid_site_name = (site_name != null)? site_name : remote_site_name;
116
117 if (this.default_lang == null) {
118 // choose english
119 this.default_lang = DEFAULT_LANG;
120 }
121
122 HashMap config_params = new HashMap();
123
124 config_params.put(GSConstants.LIBRARY_NAME, library_name);
125 config_params.put(GSConstants.INTERFACE_NAME, interface_name);
126 if (site_name != null) {
127 config_params.put(GSConstants.SITE_NAME, site_name);
128 }
129 this.converter = new XMLConverter();
130 this.doc = this.converter.newDOM();
131
132 // the receptionist -the servlet will talk to this
133 String recept_name = (String)config.getInitParameter("receptionist_class");
134 if (recept_name == null) {
135 this.recept = new DefaultReceptionist();
136 } else {
137 try {
138 this.recept = (Receptionist)Class.forName("org.greenstone.gsdl3.core."+recept_name).newInstance();
139 } catch (Exception e) { // cant use this new one, so use normal one
140 System.err.println("LibraryServlet configure exception when trying to use a new Receptionist "+recept_name+": "+e.getMessage());
141 e.printStackTrace();
142 this.recept = new DefaultReceptionist();
143 }
144 }
145 this.recept.setConfigParams(config_params);
146
147 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
148 if (site_name != null) {
149 String mr_name = (String)config.getInitParameter("messagerouter_class");
150 MessageRouter message_router = null;
151 if (mr_name == null) { // just use the normal MR
152 message_router = new MessageRouter();
153 } else { // try the specified one
154 try {
155 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
156 } catch (Exception e) { // cant use this new one, so use normal one
157 System.err.println("LibraryServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
158 e.printStackTrace();
159 message_router = new MessageRouter();
160 }
161 }
162
163 message_router.setSiteName(site_name);
164 message_router.setLibraryName(library_name);
165 message_router.configure();
166 this.recept.setMessageRouter(message_router);
167 } else {
168 // talking to a remote site, create a communicator
169 Communicator communicator = null;
170 // we need to create the XML to configure the communicator
171 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
172 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
173 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
174 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
175
176 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
177 communicator = new SOAPCommunicator();
178 } else {
179 System.err.println("LibraryServlet.init Error: invalid Communicator type: "+remote_site_type);
180 System.exit(1);
181 }
182
183 if (!communicator.configure(site_elem)) {
184 System.err.println("LibraryServlet.init Error: Couldn't configure communicator");
185 System.exit(1);
186 }
187 this.recept.setMessageRouter(communicator);
188 }
189
190 // the params arg thingy
191
192 String params_name = (String)config.getInitParameter("params_class");
193 if (params_name == null) {
194 this.params = new GSParams();
195 } else {
196 try {
197 this.params = (GSParams)Class.forName("org.greenstone.gsdl3.util."+params_name).newInstance();
198 } catch (Exception e) {
199 System.err.println("LibraryServlet configure exception when trying to use a new params thing "+params_name+": "+e.getMessage());
200 e.printStackTrace();
201 this.params = new GSParams();
202 }
203 }
204 // pass it to the receptionist
205 this.recept.setParams(this.params);
206 this.recept.configure();
207
208 }
209
210
211 private void logUsageInfo(HttpServletRequest request){
212 String usageInfo = "";
213
214 //session-info: get params stored in the session
215 HttpSession session = request.getSession(true);
216 Enumeration attributeNames = session.getAttributeNames();
217 while(attributeNames.hasMoreElements()) {
218 String name = (String)attributeNames.nextElement();
219 usageInfo +=name+"="+session.getAttribute(name)+" ";
220 }
221
222 //logged info = general-info + session-info
223 usageInfo =
224 request.getServletPath()+" "+ //serlvet
225 "["+request.getQueryString()+"]" +" "+ //the query string
226 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
227 request.getRemoteAddr()+" "+ //remote address
228 request.getRequestedSessionId()+" "+ //session id
229 request.getHeader("user-agent")+" "; //the remote brower info
230
231 logger.info(usageInfo);
232
233 }
234
235 public class UserSessionCache implements HttpSessionBindingListener {
236
237 String session_id = "";
238
239 /** a hash that maps the session ID to a hashtable that maps the coll_name to its parameters
240 * coll_name -> Hashtable (param_name -> param_value)
241 */
242 protected Hashtable coll_name_params_table = null;
243
244 public UserSessionCache(String id, Hashtable table) {
245 session_id = id;
246 coll_name_params_table = (table == null)? new Hashtable() : table;
247 }
248
249 protected void cleanupCache(String coll_name) {
250 if (coll_name_params_table.containsKey(coll_name)) {
251 coll_name_params_table.remove(coll_name);
252 }
253 }
254 protected Hashtable getParamsTable() {
255 return coll_name_params_table;
256 }
257 public void valueBound(HttpSessionBindingEvent event) {
258 // Do nothing
259 }
260
261 public void valueUnbound(HttpSessionBindingEvent event) {
262 if(session_ids_table.containsKey(session_id)) {
263 session_ids_table.remove(session_id);
264 }
265 }
266 public int tableSize() {
267 return (coll_name_params_table == null)? 0 : coll_name_params_table.size();
268 }
269 }
270
271 public void doGet (HttpServletRequest request,
272 HttpServletResponse response)
273 throws ServletException, IOException {
274 logUsageInfo (request);
275
276 String query_string = request.getQueryString();
277 if (query_string!=null){
278 String[] query_arr = query_string.split("&");
279 boolean redirect = false;
280 String href = null;
281 String rl = null;
282 for (int i=0;i<query_arr.length;i++){
283 if (query_arr[i].startsWith("el")){
284 if (query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length()).equals("direct")){
285 redirect = true;
286 }
287 }else if(query_arr[i].startsWith("href")){
288 href = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
289 href = href.replaceAll("%2f", "/");
290 href = href.replaceAll("%7e", "~");
291 href = href.replaceAll("%3f", "?");
292 href = href.replaceAll("%3A", "\\:");
293 }else if(query_arr[i].startsWith("rl")){
294 rl = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
295 }
296 }
297 //if query_string contains "el=", the web page will be redirected to the external URl, otherwise a greenstone page with an external URL will be displayed
298 //"rl=0" this is an external link
299 //"rl=1" this is an internal link
300 if ((redirect) && (href != null) && (rl.equals("0"))){// This is an external link, the web page is re-directed to the external URL (&el=&rl=0&href="http://...")
301 response.setContentType("text/xml");
302 response.sendRedirect(href);
303 }
304 }
305 // Nested Diagnostic Configurator to identify the client for
306
307 HttpSession session = request.getSession (true);
308 session.setMaxInactiveInterval(session_expiration);
309 String uid = (String)session.getAttribute (GSXML.USER_ID_ATT);
310 if (uid ==null) {
311 uid = ""+getNextUserId ();
312 session.setAttribute (GSXML.USER_ID_ATT, uid);
313 }
314 request.setCharacterEncoding ("UTF-8");
315 response.setContentType ("text/html;charset=UTF-8");
316 PrintWriter out = response.getWriter ();
317
318 String lang = request.getParameter (GSParams.LANGUAGE);
319 if (lang==null || lang.equals ("")) {
320 // try the session cached lang
321 lang = (String)session.getAttribute (GSParams.LANGUAGE);
322 if (lang==null || lang.equals ("")) {
323 // still not set, use the default
324 lang = this.default_lang;
325 }
326 }
327
328 // set the lang in the session
329 session.setAttribute (GSParams.LANGUAGE, lang);
330
331 String output = request.getParameter (GSParams.OUTPUT);
332 if (output==null || output.equals ("")) {
333 output = "html"; // uses html by default
334 }
335
336 // the request to the receptionist
337 Element xml_message = this.doc.createElement (GSXML.MESSAGE_ELEM);
338 Element xml_request = GSXML.createBasicRequest (this.doc, GSXML.REQUEST_TYPE_PAGE, "", lang, uid);
339 xml_request.setAttribute (GSXML.OUTPUT_ATT, output);
340 xml_message.appendChild (xml_request);
341
342 String action = request.getParameter (GSParams.ACTION);
343 String subaction = request.getParameter (GSParams.SUBACTION);
344 String collection = request.getParameter(GSParams.COLLECTION);
345
346 //specifically we clean up the cache session_ids_table if the two reconfigure command
347 //are issued: a=s&sa=c and a=s&sa=c&c=coll_name, in which case there is no caching action to be taken
348 boolean should_cache = true;
349 if(action != null && action.equals(GSParams.SYSTEM)
350 && subaction != null && subaction.equals(GSParams.CONFIGURE)) {
351 if (collection == null || collection.equals("")) {
352 //user reconfiugred the whole site, clean up all cached info
353 //logger.info("clear cache for the whole site.");
354 session_ids_table = new Hashtable();
355 session.removeAttribute(GSXML.USER_SESSION_CACHE_ATT);
356 } else {
357 //clean up all cache info related to the collection
358 //logger.info("clear cache for collection: " + collection);
359 ArrayList cache_list = new ArrayList(session_ids_table.values());
360 for (int i=0; i<cache_list.size(); i++) {
361 UserSessionCache cache = (UserSessionCache)cache_list.get(i);
362 cache.cleanupCache(collection);
363 }
364 }
365 should_cache = false;
366 }
367
368 // logger.info("should_cache= " + should_cache);
369 //clear the collection-specific cache in the session, since we have no way to know whether this session is
370 //about the same collection as the last session or not.
371 Enumeration attributeNames = session.getAttributeNames();
372 while(attributeNames.hasMoreElements()) {
373 String name = (String)attributeNames.nextElement();
374 if (!name.equals (GSXML.USER_SESSION_CACHE_ATT)
375 && !name.equals (GSParams.LANGUAGE)
376 && !name.equals (GSXML.USER_ID_ATT)) {
377 session.removeAttribute(name);
378 }
379 }
380
381 UserSessionCache session_cache = null;
382 Hashtable param_table = null;
383 Hashtable table = null;
384 String sid = session.getId();
385 if (should_cache == true && collection != null && !collection.equals("")) {
386 String key_str = valid_site_name + collection;
387 if (session_ids_table.containsKey(sid)) {
388 session_cache = (UserSessionCache)session_ids_table.get(sid);
389 param_table = session_cache.getParamsTable();
390 logger.info("collections in table: " + tableToString(param_table));
391 if (param_table.containsKey(key_str)) {
392 //logger.info("existing table: " + collection);
393 table = (Hashtable)param_table.get(key_str);
394 } else {
395 table = new Hashtable();
396 param_table.put(key_str, table);
397 //logger.info("new table: " + collection);
398 }
399 } else {
400 param_table = new Hashtable();
401 table = new Hashtable();
402 param_table.put(key_str, table);
403 session_cache = new UserSessionCache(sid, param_table);
404 session_ids_table.put(sid, session_cache);
405 session.setAttribute(GSXML.USER_SESSION_CACHE_ATT, session_cache);
406 //logger.info("new session id");
407 }
408 }
409
410 if (action==null || action.equals ("")) {
411 // should we do all the following stuff if using default page?
412 // display the home page - the default page
413 action = "p";
414 subaction = PageAction.HOME_PAGE;
415
416 xml_request.setAttribute (GSXML.ACTION_ATT, action);
417 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
418
419 } else {
420
421 xml_request.setAttribute (GSXML.ACTION_ATT, action);
422 if (subaction != null) {
423 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
424 }
425
426 // create the param list for the greenstone request - includes
427 // the params from the current request and any others from the saved session
428 Element xml_param_list = this.doc.createElement (GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
429 xml_request.appendChild (xml_param_list);
430
431 Enumeration params = request.getParameterNames ();
432 while(params.hasMoreElements ()) {
433 String name = (String)params.nextElement ();
434 if (!name.equals (GSParams.ACTION)
435 && !name.equals (GSParams.SUBACTION)
436 && !name.equals (GSParams.LANGUAGE)
437 && !name.equals (GSParams.OUTPUT)) {// we have already dealt with these
438 String value="";
439 String [] values = request.getParameterValues (name);
440 value = values[0];
441 if (values.length > 1) {
442 for (int i=1; i< values.length; i++) {
443 value += ","+values[i];
444 }
445 }
446 // either add it to the param list straight away, or save it to the session and add it later
447 if (this.params.shouldSave (name)) {
448 if (table != null) {
449 table.put(name, value);
450 }
451 } else {
452 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
453 param.setAttribute (GSXML.NAME_ATT, name);
454 param.setAttribute (GSXML.VALUE_ATT, GSXML.xmlSafe (value));
455 xml_param_list.appendChild (param);
456 }
457 }
458 }
459 //put everything in the table into the session
460 if (table != null) {
461 Enumeration keys = table.keys ();
462 while(keys.hasMoreElements ()) {
463 String name = (String)keys.nextElement();
464 session.setAttribute(name, (String)table.get(name));
465 }
466 }
467
468 // put in all the params from the session cache
469 params = session.getAttributeNames ();
470 while(params.hasMoreElements ()) {
471 String name = (String)params.nextElement ();
472
473 if ( !name.equals (GSXML.USER_SESSION_CACHE_ATT)
474 && !name.equals (GSParams.LANGUAGE)
475 && !name.equals (GSXML.USER_ID_ATT)) {
476 // lang and uid are stored but we dont want it in the param list cos its already in the request
477 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
478 param.setAttribute (GSXML.NAME_ATT, name);
479 String value = GSXML.xmlSafe ((String)session.getAttribute (name));
480 // ugly hack to undo : escaping
481 value = value.replaceAll ("%3A", "\\:");
482 param.setAttribute (GSXML.VALUE_ATT,value);
483 xml_param_list.appendChild (param);
484 }
485 }
486 }
487
488 if (!output.equals ("html")) {
489 response.setContentType ("text/xml"); // for now use text
490 }
491
492 //GSXML.printXMLNode(xml_message);
493
494 Element xml_result = this.recept.process (xml_message);
495 encodeURLs (xml_result, response);
496 out.println (this.converter.getPrettyString (xml_result));
497
498 displaySize(session_ids_table);
499 }
500 //a debugging method
501 private void displaySize(Hashtable table) {
502 if(table == null) {
503 logger.info("cached table is null");
504 return;
505 }
506 if (table.size() == 0) {
507 logger.info("cached table size is zero");
508 return;
509 }
510 int num_cached_coll = 0;
511 ArrayList cache_list = new ArrayList(table.values());
512 for (int i=0; i<cache_list.size(); i++) {
513 num_cached_coll += ((UserSessionCache)cache_list.get(i)).tableSize();
514 }
515 logger.info("Number of sessions : total number of cached collection info = " + table.size() + " : " + num_cached_coll);
516 }
517 /** merely a debugging method! */
518 private String tableToString(Hashtable table) {
519 String str = "";
520 Enumeration keys = table.keys ();
521 while(keys.hasMoreElements ()) {
522 String name = (String)keys.nextElement();
523 str += name + ", ";
524 }
525 return str;
526 }
527
528 /** this goes through each URL and adds in a session id if needed--
529 * its needed if the browser doesn't accept cookies
530 * also escapes things if needed
531 */
532 protected void encodeURLs(Element data, HttpServletResponse response) {
533
534 if (data == null) {
535 return;
536 }
537 // get all the <a> elements
538 NodeList hrefs = data.getElementsByTagName("a");
539 for (int i=0; i<hrefs.getLength(); i++) {
540 Element a = (Element)hrefs.item(i);
541 // ugly hack to get rid of : in the args - interferes with session handling
542 String href = a.getAttribute("href");
543 if (!href.equals("")) {
544 if (href.indexOf("?")!=-1) {
545 String[] parts = href.split("\\?", -1);
546 parts[1]=parts[1].replaceAll(":", "%3A");
547 href = parts[0]+"?"+parts[1];
548 }
549 a.setAttribute("href", response.encodeURL(href));
550 }
551 }
552
553 // now find any submit bits - get all the <form> elements
554 NodeList forms = data.getElementsByTagName("form");
555 for (int i=0; i<forms.getLength(); i++) {
556 Element form = (Element)forms.item(i);
557 form.setAttribute("action", response.encodeURL(form.getAttribute("action")));
558 }
559 // are these the only cases where URLs occur??
560 // we should only do this for greenstone urls?
561
562 }
563
564 synchronized protected int getNextUserId() {
565 next_user_id++;
566 return next_user_id;
567 }
568
569 public void doPost(HttpServletRequest request,
570 HttpServletResponse response)
571 throws ServletException, IOException {
572 doGet(request,response);
573
574 }
575}
Note: See TracBrowser for help on using the repository browser.