source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/core/URLFilter.java@ 32988

Last change on this file since 32988 was 32988, checked in by kjdon, 5 years ago
  1. added code to match document/HASHxxx/print - goes to a=d&sa=print, for a print version of the doc. 2. added code for humanverify. if humanverify is set for a colleciton, then any links to source docs (and currently any assocfiles - need to fix that) will go via a verify page, where user has to accvept terms and conditions and do a recaptcha
  • Property svn:executable set to *
File size: 17.1 KB
Line 
1package org.greenstone.gsdl3.core;
2
3import java.io.File;
4import java.io.IOException;
5import java.util.ArrayList;
6import java.util.Arrays;
7import java.util.HashMap;
8import java.util.Map;
9
10import javax.servlet.Filter;
11import javax.servlet.FilterChain;
12import javax.servlet.FilterConfig;
13import javax.servlet.ServletContext;
14import javax.servlet.ServletException;
15import javax.servlet.ServletOutputStream;
16import javax.servlet.ServletRequest;
17import javax.servlet.ServletResponse;
18import javax.servlet.http.HttpSession;
19import javax.servlet.http.HttpServletRequest;
20import javax.servlet.http.HttpServletRequestWrapper;
21import javax.servlet.http.HttpServletResponse;
22
23import org.apache.commons.io.FileUtils;
24import org.apache.commons.lang3.StringUtils;
25
26import org.apache.log4j.Logger;
27import org.greenstone.gsdl3.util.GSParams;
28import org.greenstone.gsdl3.util.GSXML;
29import org.greenstone.gsdl3.util.UserContext;
30import org.greenstone.gsdl3.util.XMLConverter;
31import org.greenstone.gsdl3.service.Authentication;
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36public class URLFilter implements Filter
37{
38 private FilterConfig _filterConfig = null;
39 private static Logger _logger = Logger.getLogger(org.greenstone.gsdl3.core.URLFilter.class.getName());
40
41 //Restricted URLs
42 protected static final String SITECONFIG_URL = "sites/[^/]+/siteConfig.xml";
43 protected static final String USERS_DB_URL = "etc/usersDB/.*";
44 protected static final ArrayList<String> _restrictedURLs;
45 static
46 {
47 ArrayList<String> restrictedURLs = new ArrayList<String>();
48 restrictedURLs.add(SITECONFIG_URL);
49 restrictedURLs.add(USERS_DB_URL);
50 _restrictedURLs = restrictedURLs;
51 }
52
53 //Constants
54 protected static final String DOCUMENT_PATH = "document";
55 protected static final String COLLECTION_PATH = "collection";
56 protected static final String GROUP_PATH = "group";
57 protected static final String PAGE_PATH = "page";
58 protected static final String SYSTEM_PATH = "system";
59 protected static final String BROWSE_PATH = "browse";
60 protected static final String SEARCH_PATH = "search";
61
62 protected static final String METADATA_RETRIEVAL_SERVICE = "DocumentMetadataRetrieve";
63 protected static final String ASSOCIATED_FILE_PATH = "/index/assoc/";
64 protected static final String COLLECTION_FILE_PATH = "/collect/";
65 protected static final String INTERFACE_PATH = "/interfaces/";
66
67 protected static final String SYSTEM_SUBACTION_CONFIGURE = "configure";
68 protected static final String SYSTEM_SUBACTION_RECONFIGURE = "reconfigure";
69 protected static final String SYSTEM_SUBACTION_ACTIVATE = "activate";
70 protected static final String SYSTEM_SUBACTION_DEACTIVATE = "deactivate";
71
72 public void init(FilterConfig filterConfig) throws ServletException
73 {
74 this._filterConfig = filterConfig;
75 }
76
77 public void destroy()
78 {
79 this._filterConfig = null;
80 }
81
82 @SuppressWarnings("deprecation")
83 public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException
84 {
85 if (request instanceof HttpServletRequest)
86 {
87 HttpServletRequest hRequest = ((HttpServletRequest) request);
88 HttpSession hSession = hRequest.getSession();
89 ServletContext context = hSession.getServletContext();
90
91 GSHttpServletRequestWrapper gRequest = new GSHttpServletRequestWrapper(hRequest);
92
93 // this is the part before the ?
94 String url = hRequest.getRequestURI().toString();
95
96 if (isURLRestricted(url))
97 {
98 response.getWriter().println("Access to this page is forbidden.");
99 return;
100 }
101
102 //If the user is trying to access a collection file we need to run a security check
103 if (url.contains(ASSOCIATED_FILE_PATH))
104 {
105 String dir = null;
106 int dirStart = url.indexOf(ASSOCIATED_FILE_PATH) + ASSOCIATED_FILE_PATH.length();
107 int dirEnd = -1;
108 if (dirStart < url.length() && url.indexOf("/", dirStart) != -1)
109 {
110 dirEnd = url.indexOf("/", dirStart);
111 }
112 if (dirEnd != -1)
113 {
114 dir = url.substring(dirStart, dirEnd);
115 }
116 if (dir == null)
117 {
118 return;
119 }
120
121 String collection = null;
122 int colStart = url.indexOf(COLLECTION_FILE_PATH) + COLLECTION_FILE_PATH.length();
123 int colEnd = -1;
124 if (colStart < url.length() && url.indexOf("/", colStart) != -1)
125 {
126 colEnd = url.indexOf("/", colStart);
127 }
128 if (colEnd != -1)
129 {
130 collection = url.substring(colStart, colEnd);
131 }
132 if (collection == null)
133 {
134 return;
135 }
136
137 MessageRouter gsRouter = (MessageRouter) context.getAttribute("GSRouter");
138
139 if (gsRouter == null)
140 {
141 _logger.error("Receptionist is null, stopping filter");
142 return;
143 }
144
145 Document gsDoc = XMLConverter.newDOM();
146
147 Element metaMessage = gsDoc.createElement(GSXML.MESSAGE_ELEM);
148 Element metaRequest = GSXML.createBasicRequest(gsDoc, GSXML.REQUEST_TYPE_PROCESS, collection + "/" + METADATA_RETRIEVAL_SERVICE, new UserContext());
149 metaMessage.appendChild(metaRequest);
150
151 Element paramList = gsDoc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
152 metaRequest.appendChild(paramList);
153
154 Element param = gsDoc.createElement(GSXML.PARAM_ELEM);
155 paramList.appendChild(param);
156
157 param.setAttribute(GSXML.NAME_ATT, "metadata");
158 param.setAttribute(GSXML.VALUE_ATT, "contains");
159
160 Element docList = gsDoc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
161 metaRequest.appendChild(docList);
162
163 Element doc = gsDoc.createElement(GSXML.DOC_NODE_ELEM);
164 docList.appendChild(doc);
165
166 doc.setAttribute(GSXML.NODE_ID_ATT, dir);
167
168 Element metaResponse = (Element) gsRouter.process(metaMessage);
169
170 NodeList metadataList = metaResponse.getElementsByTagName(GSXML.METADATA_ELEM);
171 if (metadataList.getLength() == 0)
172 {
173 _logger.error("Could not find the document related to this url");
174 }
175 else
176 {
177 Element metadata = (Element) metadataList.item(0);
178 String document = metadata.getTextContent();
179
180 //Get the security info for this collection
181 Element securityMessage = gsDoc.createElement(GSXML.MESSAGE_ELEM);
182 Element securityRequest = GSXML.createBasicRequest(gsDoc, GSXML.REQUEST_TYPE_SECURITY, collection, new UserContext());
183 securityMessage.appendChild(securityRequest);
184 if (document != null && !document.equals(""))
185 {
186 securityRequest.setAttribute(GSXML.NODE_OID, document);
187 }
188
189 Element securityResponse = (Element) GSXML.getChildByTagName(gsRouter.process(securityMessage), GSXML.RESPONSE_ELEM);
190 ArrayList<String> groups = GSXML.getGroupsFromSecurityResponse(securityResponse);
191 _logger.debug("security response = "+XMLConverter.getPrettyString(securityResponse));
192
193 if (!groups.contains(""))
194 {
195 boolean found = false;
196 for (String group : groups)
197 {
198 if (((HttpServletRequest) request).isUserInRole(group))
199 {
200 found = true;
201 break;
202 }
203 }
204
205 if (!found)
206 {
207 // this just returns nothing to the browser - get no error or anything, just an empty document
208 // can we return an error page??
209 String new_url = context.getContextPath()+"/"+ context.getAttribute("LibraryName")+"?a=p&sa=error&c="+collection+"&ec=wrong_group";
210 ((HttpServletResponse)response).sendRedirect(new_url);
211 return;
212 }
213 }
214 // if got here have no groups.
215 // do we have human verify thing?
216 boolean human_verify = false;
217
218 if (!securityResponse.getAttribute("humanVerify").equals("")) {
219 // have we already done the test?
220 String hmvf_response = gRequest.getParameter("hmvf");
221 // hmvf param will be set by form
222 if (hmvf_response != null) {
223 if (!securityResponse.getAttribute("siteKey").equals("")) {
224 String recaptcha_response = gRequest.getParameter("g-recaptcha-response");
225 String secret_key = securityResponse.getAttribute("secretKey");
226 int result = Authentication.verifyRecaptcha(secret_key, recaptcha_response);
227 _logger.debug("recaptcha result code = "+result);
228 if (result == Authentication.NO_ERROR) {
229 _logger.debug("RECAPTCHA SUCCESS, hopefully going to the document");
230
231 } else {
232 _logger.error("something went wrong with recaptcha, error="+result);
233 _logger.error(Authentication.getErrorKey(result));
234 // display error page
235 String new_url = context.getContextPath()+"/"+ context.getAttribute("LibraryName")+"?a=p&sa=error&c="+collection+"&ec=recap_fail";
236 ((HttpServletResponse)response).sendRedirect(new_url);
237
238 return;
239 }
240 }
241
242 } else {
243 // hmvf param is not set - we haven't shown them the form yet
244 // we need to display the verify page
245 String new_url = context.getContextPath()+"/"+ context.getAttribute("LibraryName")+"?a=p&sa=verify&c="+collection+"&url="+url;
246 ((HttpServletResponse)response).sendRedirect(new_url);
247 return;
248 }
249 }
250 }
251 }
252 else if (url.contains(INTERFACE_PATH))
253 {
254 String fileURL = url.replaceFirst(context.getContextPath(), "");
255 File requestedFile = new File(context.getRealPath(fileURL));
256
257 if (!requestedFile.exists())
258 {
259 int interfaceNameStart = fileURL.indexOf(INTERFACE_PATH) + INTERFACE_PATH.length();
260 int interfaceNameEnd = fileURL.indexOf("/", interfaceNameStart);
261 String interfaceName = fileURL.substring(interfaceNameStart, interfaceNameEnd);
262 String interfacesDir = fileURL.substring(0, interfaceNameStart);
263 File interfaceConfigFile = new File(context.getRealPath(interfacesDir + interfaceName + "/interfaceConfig.xml"));
264
265 if (interfaceConfigFile.exists())
266 {
267 Document interfaceConfigDoc = XMLConverter.getDOM(interfaceConfigFile);
268
269 String baseInterface = interfaceConfigDoc.getDocumentElement().getAttribute("baseInterface");
270 if (baseInterface.length() > 0)
271 {
272 File baseInterfaceFile = new File(context.getRealPath(fileURL.replace("/" + interfaceName + "/", "/" + baseInterface + "/")));
273 if (baseInterfaceFile.exists())
274 {
275 ServletOutputStream out = response.getOutputStream();
276 out.write(FileUtils.readFileToByteArray(baseInterfaceFile));
277 out.flush();
278 out.close();
279 return;
280 }
281 }
282 }
283 }
284 }
285 else
286 {
287 ArrayList<String> keywords = new ArrayList<String>();
288 keywords.add(PAGE_PATH);
289 keywords.add(BROWSE_PATH);
290 keywords.add(SEARCH_PATH);
291 keywords.add(DOCUMENT_PATH);
292 //If we have a jsessionid on the end of our URL we want to ignore it
293 int index;
294 if ((index = url.indexOf(";jsessionid")) != -1)
295 {
296 url = url.substring(0, index);
297 }
298 String[] segments = url.split("/");
299 for (int i = 0; i < segments.length; i++)
300 {
301 String[] additionalParameters = null;
302 String[] defaultParamValues = null;
303 //COLLECTION
304 if (segments[i].equals(COLLECTION_PATH) && (i + 1) < segments.length) {
305 int j=i+1;
306 while(j+1 < segments.length && !keywords.contains(segments[j+1])) {
307 j++;
308 }
309
310 if (j>i+1) {
311 // we had a group part
312 String [] groups = Arrays.copyOfRange(segments, i+1, j);
313 String group = StringUtils.join(groups, "/");
314 gRequest.setParameter(GSParams.GROUP, group);
315 }
316 gRequest.setParameter(GSParams.COLLECTION, segments[j]);
317 }
318 // GROUP
319 else if(segments[i].equals(GROUP_PATH) && (i + 1) < segments.length)
320 {
321 // assume for now, no other path parts for group links
322 int j= segments.length - 1;
323 String group;
324 if (j==i+1) {
325 group = segments[j];
326 } else {
327 String [] groups = Arrays.copyOfRange(segments, i+1, j+1);
328 group = StringUtils.join(groups, "/");
329 }
330 gRequest.setParameter(GSParams.GROUP, group);
331 gRequest.setParameter(GSParams.ACTION, "p");
332 gRequest.setParameter(GSParams.SUBACTION, "home");
333
334 }
335 //DOCUMENT
336 else if (segments[i].equals(DOCUMENT_PATH) && (i + 1) < segments.length)
337 {
338 gRequest.setParameter(GSParams.DOCUMENT, segments[i + 1]);
339
340 additionalParameters = new String[] { GSParams.ACTION };
341 defaultParamValues = new String[] { "d" };
342 if ((i+2) < segments.length && segments[i+2].equals("print")) {
343 gRequest.setParameter(GSParams.SUBACTION, "print");
344 gRequest.setParameter("ed", "1");
345
346 }
347
348 }
349 //PAGE
350 else if (segments[i].equals(PAGE_PATH) && (i + 1) < segments.length)
351 {
352 gRequest.setParameter(GSParams.SUBACTION, segments[i + 1]);
353
354 additionalParameters = new String[] { GSParams.ACTION };
355 defaultParamValues = new String[] { "p" };
356 }
357 //SYSTEM
358 else if (segments[i].equals(SYSTEM_PATH) && (i + 1) < segments.length)
359 {
360 String sa = segments[i + 1];
361 if (sa.equals(SYSTEM_SUBACTION_CONFIGURE) || sa.equals(SYSTEM_SUBACTION_RECONFIGURE))
362 {
363 sa = "c";
364 }
365 else if (sa.equals(SYSTEM_SUBACTION_ACTIVATE))
366 {
367 sa = "a";
368 }
369 else if (sa.equals(SYSTEM_SUBACTION_DEACTIVATE))
370 {
371 sa = "d";
372 }
373
374 if (sa.equals("c") && (i + 2) < segments.length)
375 {
376 gRequest.setParameter(GSParams.SYSTEM_CLUSTER, segments[i + 2]);
377 }
378
379 if (sa.equals("a") && (i + 2) < segments.length)
380 {
381 gRequest.setParameter(GSParams.SYSTEM_MODULE_TYPE, "collection");
382 gRequest.setParameter(GSParams.SYSTEM_MODULE_NAME, segments[i + 2]);
383 }
384
385 if (sa.equals("d") && (i + 2) < segments.length)
386 {
387 gRequest.setParameter(GSParams.SYSTEM_CLUSTER, segments[i + 2]);
388 }
389
390 gRequest.setParameter(GSParams.SUBACTION, sa);
391
392 additionalParameters = new String[] { GSParams.ACTION };
393 defaultParamValues = new String[] { "s" };
394 }
395 //ADMIN
396 else if (segments[i].equals("admin") && (i + 1) < segments.length)
397 {
398 String pageName = segments[i + 1];
399
400 gRequest.setParameter("s1.authpage", pageName);
401
402 additionalParameters = new String[] { GSParams.ACTION, GSParams.REQUEST_TYPE, GSParams.SUBACTION, GSParams.SERVICE };
403 defaultParamValues = new String[] { "g", "r", "authen", "Authentication" };
404 }
405 //BROWSE
406 else if (segments[i].equals(BROWSE_PATH) && (i + 1) < segments.length)
407 {
408 String cl = "";
409 for (int j = 1; (i + j) < segments.length; j++)
410 {
411 String currentSegment = segments[i + j].replace("CL", "").replace("cl", "");
412 if (currentSegment.contains("."))
413 {
414 String[] subsegments = currentSegment.split("\\.");
415 for (String subsegment : subsegments)
416 {
417 subsegment = subsegment.replace("CL", "").replace("cl", "");
418
419 if (cl.length() > 0)
420 {
421 cl += ".";
422 }
423
424 if (subsegment.length() > 0)
425 {
426 cl += subsegment;
427 }
428 }
429 continue;
430 }
431 if (!currentSegment.matches("^(CL|cl)?\\d+$"))
432 {
433 continue;
434 }
435
436 if (cl.length() > 0)
437 {
438 cl += ".";
439 }
440
441 cl += currentSegment;
442 }
443
444 gRequest.setParameter("cl", "CL" + cl);
445
446 additionalParameters = new String[] { GSParams.ACTION, GSParams.REQUEST_TYPE, GSParams.SERVICE };
447 defaultParamValues = new String[] { "b", "s", "ClassifierBrowse" };
448 }
449 //QUERY
450 else if (segments[i].equals(SEARCH_PATH))
451 {
452 String serviceName = "";
453 if ((i + 1) < segments.length)
454 {
455 serviceName = segments[i + 1];
456 gRequest.setParameter("s", serviceName);
457
458 additionalParameters = new String[] { GSParams.ACTION, GSParams.SUBACTION, GSParams.REQUEST_TYPE };
459 defaultParamValues = new String[] { "q", "", "d" };
460 }
461 if ((i + 2) < segments.length)
462 {
463 if (serviceName.equals("TextQuery") || serviceName.equals("RawQuery"))
464 {
465
466 gRequest.setParameter("s1.query", segments[i + 2]);
467 }
468 else if (serviceName.equals("FieldQuery"))
469 {
470 gRequest.setParameter("s1.fqv", segments[i + 2]);
471 }
472 else if (serviceName.equals("AdvancedFieldQuery"))
473 {
474 gRequest.setParameter("s1.fqv", segments[i + 2]);
475 }
476 }
477 }
478 if (additionalParameters != null)
479 {
480 for (int j = 0; j < additionalParameters.length; j++)
481 {
482 if (gRequest.getParameter(additionalParameters[j]) == null)
483 {
484 gRequest.setParameter(additionalParameters[j], defaultParamValues[j]);
485 }
486 }
487 }
488 }
489 }
490
491 chain.doFilter(gRequest, response);
492 }
493 else
494 {
495 //Will this ever happen?
496 System.err.println("The request was not an HttpServletRequest");
497 }
498 }
499
500 private boolean isURLRestricted(String url)
501 {
502 for (String restrictedURL : _restrictedURLs)
503 {
504 if (url.matches(".*" + restrictedURL + ".*"))
505 {
506 return true;
507 }
508 }
509
510 return false;
511 }
512
513}
Note: See TracBrowser for help on using the repository browser.