/* * GLIEntityResolver.java * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gatherer.util; import org.greenstone.gatherer.Configuration; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.Gatherer; import org.xml.sax.InputSource; import org.xml.sax.EntityResolver; import java.io.File; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; /** * Uses a list of default search paths, or the file's own path if provided, to resolve entities referenced * in the file. The search paths, including the file's own, have to be within GS. * By default, the GS3HOME/WEB-INF and toplevel GLI user directory are added to the list of search paths, * since this EntityResolver is used for web.xml to resolve its inclusion of servlets.xml, which can be * located in GS3HOME/WEB-INF for a local GLI or is extracted into the gli user directory for client-GLI. * If everything fails, it will try to resolve entities using the classloader, not otherwise used by GLI, * but this part has been copied over from GS3's GLIEntityResolver.java. * If ever needed, maybe to make things faster, can maintain a static list of default search paths. */ public class GLIEntityResolver implements EntityResolver { protected ArrayList list_of_local_search_paths = new ArrayList(); ClassLoader class_loader = null; public GLIEntityResolver() { // Add the basic search paths: // add the GS3 web/WEB-INF folder as search location for servlets.xml, which web.xml // includes and which is the reason for adding a GLIEntityResolver class into GLI if(Gatherer.GS3) { //list_of_local_search_paths.add(new File(Configuration.gsdl3_path)); list_of_local_search_paths.add(new File(Configuration.gsdl3_path, "WEB-INF")); } // Add gli user dir for remote GS, since that is where the // web.xml and server.xml downloaded from the remote server will be unpacked to if(Gatherer.isGsdlRemote) { list_of_local_search_paths.add(new File(Configuration.gli_user_directory_path)); } } public GLIEntityResolver(File file) { this(); // adds default search paths // add file's own path as first in search list addSafeSearchPath(file, true); } /* Methods with the ClassLoader parameter are unused at present */ public GLIEntityResolver(ClassLoader loader) { this.class_loader = loader; } public GLIEntityResolver(File f, ClassLoader loader) { this(f); this.class_loader = loader; } public void setClassLoader(ClassLoader loader) { this.class_loader = loader; } public void addSafeSearchPath(File file, boolean prepend) { // add the file's directory to list of search paths, // if it is within the greenstone installation and not already in the list if((!Gatherer.GS3 && file.getAbsolutePath().startsWith(Configuration.gsdl_path)) || (Gatherer.GS3 && file.getAbsolutePath().startsWith(Configuration.gsdl3_src_path))) { File path = file.getParentFile(); if(!list_of_local_search_paths.contains(path)) { if(prepend) { list_of_local_search_paths.add(0, path); } else { list_of_local_search_paths.add(path); } } } else { DebugStream.println("### Location of file " + file + " not within GS. Not adding to list of search paths."); } } /** * resolveEntity() is not called for every file: * http://www.postseek.com/meta/37735b65e6a459a6aa631f048cc5a0b6 * * "I think the parser will call [resolveEntity() on an EntityResolver that has been set], if * [the parser] is unable to find the DTD that your XML file refers to. So if you try to parse * an XML file that doesn't refer to a DTD, it won't be called. And if you try to parse an XML * file where the parser can find the DTD, it won't be called either." * */ public InputSource resolveEntity (String public_id, String system_id) { DebugStream.println("### resolveEntity() called for " + system_id); String temp_id = system_id; if (temp_id.startsWith("file://")) { File f = new File(system_id); if (f.exists()) { // check if inside GS2 or GS3 installation if(f.getAbsolutePath().startsWith(Configuration.gsdl_path) || f.getAbsolutePath().startsWith(Configuration.gsdl3_src_path)) { DebugStream.println("### file denoted by systemID is inside GS: " + f.getAbsolutePath()); return new InputSource(system_id); // problem solved } else { DebugStream.println("\t### file denoted by systemID exists, but not located inside GS: " + f.getAbsolutePath()); } } else { temp_id = f.getName(); //check in list of search paths Iterator i = list_of_local_search_paths.iterator(); while(i.hasNext()) { File searchPath = i.next(); DebugStream.println("### searching for entity '" + temp_id + "' in: " + searchPath.getAbsolutePath()); File searchFile = new File(searchPath, temp_id); if(searchFile.exists()) { DebugStream.println("\t Found " + searchFile.getAbsolutePath()); String newpath = searchFile.getAbsolutePath(); if(Utility.isWindows()) { newpath = "file:///" + newpath.replace("\\", "/"); } else { // linux version, file protocol starts with file:// and slashes are already URL-style newpath = "file://" + newpath; } return new InputSource(newpath); } } // else, external entity/file denoted by systemid is not in list of search paths, try classloader } } else { DebugStream.println("### Entity is not a file: " + system_id); if (temp_id.indexOf("/")!= -1) { temp_id = temp_id.substring(temp_id.lastIndexOf("/")+1); } } DebugStream.println("### Using classloader to attempt to resolve entity: " + temp_id); // try using a class loader. If none provided, use current class loader if (this.class_loader==null) { this.class_loader = this.getClass().getClassLoader(); } URL url = class_loader.getResource(temp_id); if (url == null) { return null; } return new InputSource("file://"+url.getFile()); } }