package org.greenstone.gsdl3.core; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import javax.servlet.Filter; import javax.servlet.FilterChain; import javax.servlet.FilterConfig; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.ServletOutputStream; import javax.servlet.ServletRequest; import javax.servlet.ServletResponse; import javax.servlet.http.HttpSession; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequestWrapper; import javax.servlet.http.HttpServletResponse; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.greenstone.gsdl3.util.GSParams; import org.greenstone.gsdl3.util.GSPath; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.UserContext; import org.greenstone.gsdl3.util.XMLConverter; import org.greenstone.gsdl3.service.Authentication; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; public class URLFilter implements Filter { private FilterConfig _filterConfig = null; private static Logger _logger = Logger.getLogger(org.greenstone.gsdl3.core.URLFilter.class.getName()); //Restricted URLs protected static final String SITECONFIG_URL = "sites/[^/]+/siteConfig.xml"; protected static final String USERS_DB_URL = "etc/usersDB/.*"; protected static final ArrayList _restrictedURLs; static { ArrayList restrictedURLs = new ArrayList(); restrictedURLs.add(SITECONFIG_URL); restrictedURLs.add(USERS_DB_URL); _restrictedURLs = restrictedURLs; } //Constants protected static final String DOCUMENT_PATH = "document"; protected static final String COLLECTION_PATH = "collection"; protected static final String GROUP_PATH = "group"; protected static final String PAGE_PATH = "page"; protected static final String SYSTEM_PATH = "system"; protected static final String BROWSE_PATH = "browse"; protected static final String SEARCH_PATH = "search"; protected static final String METADATA_RETRIEVAL_SERVICE = "DocumentMetadataRetrieve"; protected static final String ASSOCIATED_FILE_PATH = "/index/assoc/"; protected static final String COLLECTION_FILE_PATH = "/collect/"; protected static final String INTERFACE_PATH = "/interfaces/"; protected static final String SITES_PATH = "/sites/"; protected static final String SYSTEM_SUBACTION_CONFIGURE = "configure"; protected static final String SYSTEM_SUBACTION_RECONFIGURE = "reconfigure"; protected static final String SYSTEM_SUBACTION_ACTIVATE = "activate"; protected static final String SYSTEM_SUBACTION_DEACTIVATE = "deactivate"; public void init(FilterConfig filterConfig) throws ServletException { this._filterConfig = filterConfig; } public void destroy() { this._filterConfig = null; } @SuppressWarnings("deprecation") public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { if (!(request instanceof HttpServletRequest)) { // Can this ever happen? _logger.error("The request was not an HttpServletRequest"); return; } HttpServletRequest hRequest = ((HttpServletRequest) request); HttpSession hSession = hRequest.getSession(); ServletContext context = hSession.getServletContext(); GSHttpServletRequestWrapper gRequest = new GSHttpServletRequestWrapper(hRequest); // this is the part before the ? String url = hRequest.getRequestURI().toString(); if (isURLRestricted(url)) { // TODO - should we make this a proper HTML page? response.getWriter().println("ERROR: Access to this page is forbidden."); return; } // Run security checks on files requested from a collection's index/assoc folder if (url.contains(ASSOCIATED_FILE_PATH)) { // now we need to get library name from the path, which is like // /greenstone3/library/sites/localsite/collect/collname/index/assoc/... String library_name = url.replaceFirst(context.getContextPath(), ""); library_name = library_name.substring(0, library_name.indexOf(SITES_PATH)); if (library_name.equals("")) { response.getWriter().println("ERROR: Assoc file paths must now contain the library name"); return; } // remove initial '/' library_name = library_name.substring(1); MessageRouter gsRouter = (MessageRouter) context.getAttribute(library_name+"Router"); if (gsRouter == null) { _logger.error("Receptionist is null, stopping filter"); return; } // Sometimes we have a // before the filename - that mucks up the following code, so lets remove them url = url.replaceAll("//","/"); String dir = null; int dirStart = url.indexOf(ASSOCIATED_FILE_PATH) + ASSOCIATED_FILE_PATH.length(); int dirEnd = -1; if (dirStart < url.length() && url.indexOf("/", dirStart) != -1) { dirEnd = url.lastIndexOf("/"); } if (dirEnd != -1) { dir = url.substring(dirStart, dirEnd); } if (dir == null) { return; } String collection = null; int colStart = url.indexOf(COLLECTION_FILE_PATH) + COLLECTION_FILE_PATH.length(); int colEnd = -1; if (colStart < url.length() && url.indexOf("/", colStart) != -1) { colEnd = url.indexOf("/", colStart); } if (colEnd != -1) { collection = url.substring(colStart, colEnd); } if (collection == null) { return; } String file_name = url.substring(url.lastIndexOf("/")+1); // Query the MR with a request for the contains metadata for node "dir" - where dir is the assocfilepath // In the jdbm db, have entries like // [HASH1552e] // HASH1552e3sdlkjf7sdfsdfk // mapping assocfilepath to doc id String document = queryMRforDOCID(gsRouter, collection, dir); if (document == null) { response.getWriter().println("ERROR: Couldn't find the document associated with assocfilepath: "+dir); return; } //Query the MR for the security info for this document - can we show it? Or do we need to be logged in? // Or do we need to throw up the verify page? // While we are doing this, query the document for its srclinkFile metadata - then we can determine if the // file we are being asked for is the main doc (eg pdf) or just a supporting image on the page //Get the security info for this collection Document gsDoc = XMLConverter.newDOM(); Element securityMessage = gsDoc.createElement(GSXML.MESSAGE_ELEM); Element securityRequest = GSXML.createBasicRequest(gsDoc, GSXML.REQUEST_TYPE_SECURITY, collection, new UserContext()); securityMessage.appendChild(securityRequest); securityRequest.setAttribute(GSXML.NODE_OID, document); // get the srclinkFile for the document Element metadata_request = GSXML.createBasicRequest(gsDoc, GSXML.REQUEST_TYPE_PROCESS, GSPath.appendLink(collection, "DocumentMetadataRetrieve"), new UserContext()); Element param_list = gsDoc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); GSXML.addParameterToList(param_list, "metadata", "srclinkFile"); metadata_request.appendChild(param_list); Element doc_list = gsDoc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); metadata_request.appendChild(doc_list); Element d = gsDoc.createElement(GSXML.DOC_NODE_ELEM); d.setAttribute(GSXML.NODE_ID_ATT, document); doc_list.appendChild(d); securityMessage.appendChild(metadata_request); Element mr_response = (Element)gsRouter.process(securityMessage); _logger.debug("security response = "+XMLConverter.getPrettyString(mr_response)); boolean verifiable_file = true; // TODO check for errors Element meta_response = (Element) GSXML.getNamedElement(mr_response, GSXML.RESPONSE_ELEM, GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); Element metadata_list = (Element)meta_response.getElementsByTagName(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER).item(0); String srcdoc = GSXML.getMetadataValue(metadata_list, "srclinkFile"); if (!srcdoc.equals(file_name)) { // the specified file is just a supporting file, not the main file. // eg an image in an html doc. verifiable_file = false; } Element securityResponse = (Element) GSXML.getNamedElement(mr_response, GSXML.RESPONSE_ELEM, GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_SECURITY); ArrayList groups = GSXML.getGroupsFromSecurityResponse(securityResponse); if (!groups.contains("")) { boolean found = false; for (String group : groups) { if (((HttpServletRequest) request).isUserInRole(group)) { found = true; break; } } if (!found) { // return an error page to the browser String new_url = context.getContextPath()+"/"+ library_name+"?a=p&sa=error&c="+collection+"&ec=wrong_group"; ((HttpServletResponse)response).sendRedirect(new_url); return; } } // if got here have no groups. // do we have human verify thing? if (verifiable_file) { // we are asking for the main document - lets check human verify if (!securityResponse.getAttribute(GSXML.VERIFY_ATT).equals("")) { // have we done the test previously? HttpSession this_session = ((HttpServletRequest) request).getSession(); if (this_session == null) { _logger.error("KATH session is null"); } else { _logger.error("KATH session id = "+this_session.getId()); } if (this_session.getAttribute(GSParams.VERIFIED) != null ) { _logger.error("KATH have verified in the session"); // we don't need to re-verify } else { _logger.error("KATH verfied not in session"); // have we just done the test? String hmvf_response = gRequest.getParameter(GSParams.VERIFIED); // hmvf param will be set by form if the verify page was submitted if (hmvf_response != null && hmvf_response.equals("1")) { if (!securityResponse.getAttribute(GSXML.SITE_KEY_ATT).equals("")) { String recaptcha_response = gRequest.getParameter(Authentication.RECAPTCHA_RESPONSE_PARAM); String secret_key = securityResponse.getAttribute(GSXML.SECRET_KEY_ATT); int result = Authentication.verifyRecaptcha(secret_key, recaptcha_response); _logger.debug("recaptcha result code = "+result); if (result == Authentication.NO_ERROR) { _logger.debug("RECAPTCHA SUCCESS, hopefully going to the document"); this_session.setAttribute(GSParams.VERIFIED, "1"); } else { _logger.error("something went wrong with recaptcha, error="+result); _logger.error(Authentication.getErrorKey(result)); // display error page //String new_url = context.getContextPath()+"/"+ context.getAttribute("LibraryName")+"?a=p&sa=error&c="+collection+"&ec=recap_fail"; String new_url = context.getContextPath()+"/"+ library_name+"?a=p&sa=error&c="+collection+"&ec=recap_fail"; ((HttpServletResponse)response).sendRedirect(new_url); return; } } } else { // hmvf param is not set - we haven't shown them the form yet // we need to display the verify page //String new_url = context.getContextPath()+"/"+ context.getAttribute("LibraryName")+"?a=p&sa=verify&c="+collection+"&url="+url; String new_url = context.getContextPath()+"/"+ library_name+"?a=p&sa=verify&c="+collection+"&url="+url; ((HttpServletResponse)response).sendRedirect(new_url); return; } } } } // if we got here, we have passed all security checks and just want to view the file. // However, we need to remove the library_name from the URL. As can't change the // existing URL, we need to forward to the new one. // Remove the context and library name parts. // don't know what happens with the rest of the filter chain? Does this bypass that?? url = url.replaceFirst(context.getContextPath(), ""); url = url.replaceFirst("/"+library_name, ""); request.getRequestDispatcher(url).forward(request, response); return; } else if (url.contains(INTERFACE_PATH)) { String fileURL = url.replaceFirst(context.getContextPath(), ""); File requestedFile = new File(context.getRealPath(fileURL)); if (!requestedFile.exists()) { int interfaceNameStart = fileURL.indexOf(INTERFACE_PATH) + INTERFACE_PATH.length(); int interfaceNameEnd = fileURL.indexOf("/", interfaceNameStart); String interfaceName = fileURL.substring(interfaceNameStart, interfaceNameEnd); String interfacesDir = fileURL.substring(0, interfaceNameStart); File interfaceConfigFile = new File(context.getRealPath(interfacesDir + interfaceName + "/interfaceConfig.xml")); if (interfaceConfigFile.exists()) { Document interfaceConfigDoc = XMLConverter.getDOM(interfaceConfigFile); String baseInterface = interfaceConfigDoc.getDocumentElement().getAttribute("baseInterface"); if (baseInterface.length() > 0) { File baseInterfaceFile = new File(context.getRealPath(fileURL.replace("/" + interfaceName + "/", "/" + baseInterface + "/"))); if (baseInterfaceFile.exists()) { ServletOutputStream out = response.getOutputStream(); out.write(FileUtils.readFileToByteArray(baseInterfaceFile)); out.flush(); out.close(); return; } } } } } else { ArrayList keywords = new ArrayList(); keywords.add(PAGE_PATH); keywords.add(BROWSE_PATH); keywords.add(SEARCH_PATH); keywords.add(DOCUMENT_PATH); //If we have a jsessionid on the end of our URL we want to ignore it int index; if ((index = url.indexOf(";jsessionid")) != -1) { url = url.substring(0, index); } String[] segments = url.split("/"); for (int i = 0; i < segments.length; i++) { String[] additionalParameters = null; String[] defaultParamValues = null; //COLLECTION if (segments[i].equals(COLLECTION_PATH) && (i + 1) < segments.length) { int j=i+1; while(j+1 < segments.length && !keywords.contains(segments[j+1])) { j++; } if (j>i+1) { // we had a group part String [] groups = Arrays.copyOfRange(segments, i+1, j); String group = StringUtils.join(groups, "/"); gRequest.setParameter(GSParams.GROUP, group); } gRequest.setParameter(GSParams.COLLECTION, segments[j]); } // GROUP else if(segments[i].equals(GROUP_PATH) && (i + 1) < segments.length) { // assume for now, no other path parts for group links int j= segments.length - 1; String group; if (j==i+1) { group = segments[j]; } else { String [] groups = Arrays.copyOfRange(segments, i+1, j+1); group = StringUtils.join(groups, "/"); } gRequest.setParameter(GSParams.GROUP, group); gRequest.setParameter(GSParams.ACTION, "p"); gRequest.setParameter(GSParams.SUBACTION, "home"); } //DOCUMENT else if (segments[i].equals(DOCUMENT_PATH) && (i + 1) < segments.length) { gRequest.setParameter(GSParams.DOCUMENT, segments[i + 1]); additionalParameters = new String[] { GSParams.ACTION }; defaultParamValues = new String[] { "d" }; if ((i+2) < segments.length && segments[i+2].equals("print")) { gRequest.setParameter(GSParams.SUBACTION, "print"); gRequest.setParameter("ed", "1"); } } //PAGE else if (segments[i].equals(PAGE_PATH) && (i + 1) < segments.length) { gRequest.setParameter(GSParams.SUBACTION, segments[i + 1]); additionalParameters = new String[] { GSParams.ACTION }; defaultParamValues = new String[] { "p" }; } //SYSTEM else if (segments[i].equals(SYSTEM_PATH) && (i + 1) < segments.length) { String sa = segments[i + 1]; if (sa.equals(SYSTEM_SUBACTION_CONFIGURE) || sa.equals(SYSTEM_SUBACTION_RECONFIGURE)) { sa = "c"; } else if (sa.equals(SYSTEM_SUBACTION_ACTIVATE)) { sa = "a"; } else if (sa.equals(SYSTEM_SUBACTION_DEACTIVATE)) { sa = "d"; } if (sa.equals("c") && (i + 2) < segments.length) { gRequest.setParameter(GSParams.SYSTEM_CLUSTER, segments[i + 2]); } if (sa.equals("a") && (i + 2) < segments.length) { gRequest.setParameter(GSParams.SYSTEM_MODULE_TYPE, "collection"); gRequest.setParameter(GSParams.SYSTEM_MODULE_NAME, segments[i + 2]); } if (sa.equals("d") && (i + 2) < segments.length) { gRequest.setParameter(GSParams.SYSTEM_CLUSTER, segments[i + 2]); } gRequest.setParameter(GSParams.SUBACTION, sa); additionalParameters = new String[] { GSParams.ACTION }; defaultParamValues = new String[] { "s" }; } //ADMIN else if (segments[i].equals("admin") && (i + 1) < segments.length) { String pageName = segments[i + 1]; gRequest.setParameter("s1.authpage", pageName); additionalParameters = new String[] { GSParams.ACTION, GSParams.REQUEST_TYPE, GSParams.SUBACTION, GSParams.SERVICE }; defaultParamValues = new String[] { "g", "r", "authen", "Authentication" }; } //BROWSE else if (segments[i].equals(BROWSE_PATH) && (i + 1) < segments.length) { String cl = ""; for (int j = 1; (i + j) < segments.length; j++) { String currentSegment = segments[i + j].replace("CL", "").replace("cl", ""); if (currentSegment.contains(".")) { String[] subsegments = currentSegment.split("\\."); for (String subsegment : subsegments) { subsegment = subsegment.replace("CL", "").replace("cl", ""); if (cl.length() > 0) { cl += "."; } if (subsegment.length() > 0) { cl += subsegment; } } continue; } if (!currentSegment.matches("^(CL|cl)?\\d+$")) { continue; } if (cl.length() > 0) { cl += "."; } cl += currentSegment; } gRequest.setParameter("cl", "CL" + cl); additionalParameters = new String[] { GSParams.ACTION, GSParams.REQUEST_TYPE, GSParams.SERVICE }; defaultParamValues = new String[] { "b", "s", "ClassifierBrowse" }; } //QUERY else if (segments[i].equals(SEARCH_PATH)) { String serviceName = ""; if ((i + 1) < segments.length) { serviceName = segments[i + 1]; gRequest.setParameter("s", serviceName); additionalParameters = new String[] { GSParams.ACTION, GSParams.SUBACTION, GSParams.REQUEST_TYPE }; defaultParamValues = new String[] { "q", "", "d" }; } if ((i + 2) < segments.length) { if (serviceName.equals("TextQuery") || serviceName.equals("RawQuery")) { gRequest.setParameter("s1.query", segments[i + 2]); } else if (serviceName.equals("FieldQuery")) { gRequest.setParameter("s1.fqv", segments[i + 2]); } else if (serviceName.equals("AdvancedFieldQuery")) { gRequest.setParameter("s1.fqv", segments[i + 2]); } } } if (additionalParameters != null) { for (int j = 0; j < additionalParameters.length; j++) { if (gRequest.getParameter(additionalParameters[j]) == null) { gRequest.setParameter(additionalParameters[j], defaultParamValues[j]); } } } } } chain.doFilter(gRequest, response); } private boolean isURLRestricted(String url) { for (String restrictedURL : _restrictedURLs) { if (url.matches(".*" + restrictedURL + ".*")) { return true; } } return false; } private String queryMRforDOCID(MessageRouter gsRouter, String collection, String assocfiledir) { Document gsDoc = XMLConverter.newDOM(); Element metaMessage = gsDoc.createElement(GSXML.MESSAGE_ELEM); Element metaRequest = GSXML.createBasicRequest(gsDoc, GSXML.REQUEST_TYPE_PROCESS, collection + "/" + METADATA_RETRIEVAL_SERVICE, new UserContext()); metaMessage.appendChild(metaRequest); Element paramList = gsDoc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); metaRequest.appendChild(paramList); Element param = gsDoc.createElement(GSXML.PARAM_ELEM); param.setAttribute(GSXML.NAME_ATT, "metadata"); param.setAttribute(GSXML.VALUE_ATT, "contains"); paramList.appendChild(param); Element docList = gsDoc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); metaRequest.appendChild(docList); Element doc = gsDoc.createElement(GSXML.DOC_NODE_ELEM); doc.setAttribute(GSXML.NODE_ID_ATT, assocfiledir); docList.appendChild(doc); Element metaResponse = (Element) gsRouter.process(metaMessage); NodeList metadataList = metaResponse.getElementsByTagName(GSXML.METADATA_ELEM); if (metadataList.getLength() == 0) { _logger.error("Could not find the document related to this url"); return null; } Element metadata = (Element) metadataList.item(0); String document = metadata.getTextContent(); if (document != null && document.equals("")) { document = null; } return document; } }