source: main/trunk/gli/src/org/greenstone/gatherer/util/GLIEntityResolver.java@ 29992

Last change on this file since 29992 was 29992, checked in by ak19, 9 years ago

Minor correction to comment.

File size: 6.8 KB
Line 
1/*
2 * GLIEntityResolver.java
3 * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gatherer.util;
21
22import org.greenstone.gatherer.Configuration;
23import org.greenstone.gatherer.DebugStream;
24import org.greenstone.gatherer.Gatherer;
25
26import org.xml.sax.InputSource;
27import org.xml.sax.EntityResolver;
28import java.io.File;
29import java.net.URL;
30import java.util.ArrayList;
31import java.util.Iterator;
32
33
34/**
35 * Uses a list of default search paths, or the file's own path if provided, to resolve entities referenced
36 * in the file. The search paths, including the file's own, have to be within GS.
37 * By default, the GS3HOME/WEB-INF and toplevel GLI user directory are added to the list of search paths,
38 * since this EntityResolver is used for web.xml to resolve its inclusion of servlets.xml, which can be
39 * located in GS3HOME/WEB-INF for a local GLI or is extracted into the gli user directory for client-GLI.
40 * If everything fails, it will try to resolve entities using the classloader, not otherwise used by GLI,
41 * but this part has been copied over from GS3's GSEntityResolver.java.
42 * If ever needed, maybe to make things faster, can maintain a static list of default search paths.
43 */
44public class GLIEntityResolver implements EntityResolver {
45
46 protected ArrayList<File> list_of_local_search_paths = new ArrayList<File>();
47 ClassLoader class_loader = null;
48
49
50 public GLIEntityResolver() {
51 // Add the basic search paths:
52
53 // add the GS3 web/WEB-INF folder as search location for servlets.xml, which web.xml
54 // includes and which is the reason for adding a GLIEntityResolver class into GLI
55
56 if(Gatherer.GS3) {
57 //list_of_local_search_paths.add(new File(Configuration.gsdl3_path));
58 list_of_local_search_paths.add(new File(Configuration.gsdl3_path, "WEB-INF"));
59 }
60
61 // Add gli user dir for remote GS, since that is where the
62 // web.xml and server.xml downloaded from the remote server will be unpacked to
63 if(Gatherer.isGsdlRemote) {
64 list_of_local_search_paths.add(new File(Configuration.gli_user_directory_path));
65 }
66 }
67
68 public GLIEntityResolver(File file) {
69 this(); // adds default search paths
70
71 // add file's own path as first in search list
72 addSafeSearchPath(file, true);
73 }
74
75 /* Methods with the ClassLoader parameter are unused at present */
76 public GLIEntityResolver(ClassLoader loader) {
77 this.class_loader = loader;
78 }
79
80 public GLIEntityResolver(File f, ClassLoader loader) {
81 this(f);
82 this.class_loader = loader;
83 }
84
85 public void setClassLoader(ClassLoader loader) {
86 this.class_loader = loader;
87 }
88
89 public void addSafeSearchPath(File file, boolean prepend) {
90
91 // add the file's directory to list of search paths,
92 // if it is within the greenstone installation and not already in the list
93 if((!Gatherer.GS3 && file.getAbsolutePath().startsWith(Configuration.gsdl_path))
94 || (Gatherer.GS3 && file.getAbsolutePath().startsWith(Configuration.gsdl3_src_path))) {
95
96 File path = file.getParentFile();
97 if(!list_of_local_search_paths.contains(path)) {
98 if(prepend) {
99 list_of_local_search_paths.add(0, path);
100 } else {
101 list_of_local_search_paths.add(path);
102 }
103 }
104 } else {
105 DebugStream.println("### Location of file " + file + " not within GS. Not adding to list of search paths.");
106 }
107 }
108
109 /**
110 * resolveEntity() is not called for every file:
111 * http://www.postseek.com/meta/37735b65e6a459a6aa631f048cc5a0b6
112 *
113 * "I think the parser will call [resolveEntity() on an EntityResolver that has been set], if
114 * [the parser] is unable to find the DTD that your XML file refers to. So if you try to parse
115 * an XML file that doesn't refer to a DTD, it won't be called. And if you try to parse an XML
116 * file where the parser can find the DTD, it won't be called either."
117 *
118 */
119 public InputSource resolveEntity (String public_id, String system_id) {
120
121 DebugStream.println("### resolveEntity() called for " + system_id);
122
123 String temp_id = system_id;
124 if (temp_id.startsWith("file://")) {
125 File f = new File(system_id);
126 if (f.exists()) {
127
128 // check if inside GS2 or GS3 installation
129 if(f.getAbsolutePath().startsWith(Configuration.gsdl_path)
130 || f.getAbsolutePath().startsWith(Configuration.gsdl3_src_path)) {
131
132 DebugStream.println("### file denoted by systemID is inside GS: " + f.getAbsolutePath());
133
134 return new InputSource(system_id); // problem solved
135 } else {
136 DebugStream.println("\t### file denoted by systemID exists, but not located inside GS: "
137 + f.getAbsolutePath());
138 }
139 } else {
140
141 temp_id = f.getName();
142
143 //check in list of search paths
144 Iterator<File> i = list_of_local_search_paths.iterator();
145 while(i.hasNext()) {
146 File searchPath = i.next();
147 DebugStream.println("### searching for entity '" + temp_id + "' in: "
148 + searchPath.getAbsolutePath());
149
150 File searchFile = new File(searchPath, temp_id);
151
152 if(searchFile.exists()) {
153 DebugStream.println("\t Found " + searchFile.getAbsolutePath());
154
155 String newpath = searchFile.getAbsolutePath();
156 if(Utility.isWindows()) {
157 newpath = "file:///" + newpath.replace("\\", "/");
158 } else { // linux version, file protocol starts with file:// and slashes are already URL-style
159 newpath = "file://" + newpath;
160 }
161 return new InputSource(newpath);
162
163 }
164 }
165
166 // else, external entity/file denoted by systemid is not in list of search paths, try classloader
167
168 }
169 } else {
170
171 DebugStream.println("### Entity is not a file: " + system_id);
172
173 if (temp_id.indexOf("/")!= -1) {
174 temp_id = temp_id.substring(temp_id.lastIndexOf("/")+1);
175 }
176 }
177
178 DebugStream.println("### Using classloader to attempt to resolve entity: " + temp_id);
179
180 // try using a class loader. If none provided, use current class loader
181 if (this.class_loader==null) {
182 this.class_loader = this.getClass().getClassLoader();
183 }
184 URL url = class_loader.getResource(temp_id);
185 if (url == null) {
186 return null;
187 }
188
189 return new InputSource("file://"+url.getFile());
190 }
191}
Note: See TracBrowser for help on using the repository browser.