source: main/trunk/gli/src/org/greenstone/gatherer/util/GLIEntityResolver.java@ 29730

Last change on this file since 29730 was 29730, checked in by ak19, 9 years ago

The second and final part of the commits to getting GLI running again and parsing web.xml, after the changes to commit r29687, where web.xml was split into two and included server.xml. In this commit: 1. GLI uses an EntityResolver to resolve entities in web.xml that are defined in the included servlets.xml file. In order to keep XMLTools.java tidy and hopefully make the GLI entity resolver more reusable, the new GLIEntityResolver.java class checks default search paths first when asked to resolve entities. web/WEB-INF, where web.xml and servlets.xml live, has been added to the default search paths, as also the gli user dir where the web.xml and server.xml will be in a client-gli situation. 2. Small tidy up to Greenstone runtime's GSEntityResolver. 3. Remote Greenstone gliserver.pl needs to also transfer the new server.xml file when zipping up web.xml. 4. Minor touchups to the new README on apache.jar.

File size: 6.4 KB
Line 
1/*
2 * GLIEntityResolver.java
3 * Copyright (C) 2008 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20package org.greenstone.gatherer.util;
21
22import org.greenstone.gatherer.Configuration;
23import org.greenstone.gatherer.DebugStream;
24import org.greenstone.gatherer.Gatherer;
25
26import org.xml.sax.InputSource;
27import org.xml.sax.EntityResolver;
28import java.io.File;
29import java.net.URL;
30import java.util.ArrayList;
31import java.util.Iterator;
32
33
34/**
35 * Uses a list of default search paths, or the file's own path if provided, to resolve entities referenced
36 * in the file. The search paths, including the file's own, have to be within GS.
37 * By default, the GS3HOME/WEB-INF and toplevel GLI user directory are added to the list of search paths,
38 * since this EntityResolver is used for web.xml to resolve its inclusion of servlets.xml, which can be
39 * located in GS3HOME/WEB-INF for a local GLI or is extracted into the gli user directory for client-GLI.
40 * If everything fails, it will try to resolve entities using the classloader, not otherwise used by GLI,
41 * but this part has been copied over from GS3's GLIEntityResolver.java.
42 * If ever needed, maybe to make things faster, can maintain a static list of default search paths.
43 */
44public class GLIEntityResolver implements EntityResolver {
45
46 protected ArrayList<File> list_of_local_search_paths = new ArrayList<File>();
47 ClassLoader class_loader = null;
48
49
50 public GLIEntityResolver() {
51 // Add the basic search paths:
52
53 // add the GS3 web/WEB-INF folder as search location for servlets.xml, which web.xml
54 // includes and which is the reason for adding a GLIEntityResolver class into GLI
55
56 if(Gatherer.GS3) {
57 //list_of_local_search_paths.add(new File(Configuration.gsdl3_path));
58 list_of_local_search_paths.add(new File(Configuration.gsdl3_path, "WEB-INF"));
59 }
60
61 // Add gli user dir for remote GS, since that is where the
62 // web.xml and server.xml downloaded from the remote server will be unpacked to
63 if(Gatherer.isGsdlRemote) {
64 list_of_local_search_paths.add(new File(Configuration.gli_user_directory_path));
65 }
66 }
67
68 public GLIEntityResolver(File file) {
69 this(); // adds default search paths
70
71 // add file's own path as first in search list
72 addSafeSearchPath(file, true);
73 }
74
75 /* Methods with the ClassLoader parameter are unused at present */
76 public GLIEntityResolver(ClassLoader loader) {
77 this.class_loader = loader;
78 }
79
80 public GLIEntityResolver(File f, ClassLoader loader) {
81 this(f);
82 this.class_loader = loader;
83 }
84
85 public void setClassLoader(ClassLoader loader) {
86 this.class_loader = loader;
87 }
88
89 public void addSafeSearchPath(File file, boolean prepend) {
90
91 // add the file's directory to list of search paths,
92 // if it is within the greenstone installation and not already in the list
93 if(file.getAbsolutePath().startsWith(Configuration.gsdl3_src_path)) {
94 File path = file.getParentFile();
95 if(!list_of_local_search_paths.contains(path)) {
96 if(prepend) {
97 list_of_local_search_paths.add(0, path);
98 } else {
99 list_of_local_search_paths.add(path);
100 }
101 }
102 } else {
103 DebugStream.println("### Location of file " + file + " not within GS. Not adding to list of search paths.");
104 }
105 }
106
107 /**
108 * resolveEntity() is not called for every file:
109 * http://www.postseek.com/meta/37735b65e6a459a6aa631f048cc5a0b6
110 *
111 * "I think the parser will call [resolveEntity() on an EntityResolver that has been set], if
112 * [the parser] is unable to find the DTD that your XML file refers to. So if you try to parse
113 * an XML file that doesn't refer to a DTD, it won't be called. And if you try to parse an XML
114 * file where the parser can find the DTD, it won't be called either."
115 *
116 */
117 public InputSource resolveEntity (String public_id, String system_id) {
118
119 DebugStream.println("### resolveEntity() called for " + system_id);
120
121 String temp_id = system_id;
122 if (temp_id.startsWith("file://")) {
123 File f = new File(system_id);
124 if (f.exists()) {
125
126 // check if inside GS2 or GS3 installation
127 if(f.getAbsolutePath().startsWith(Configuration.gsdl_path)
128 || f.getAbsolutePath().startsWith(Configuration.gsdl3_src_path)) {
129
130 DebugStream.println("### file denoted by systemID is inside GS: " + f.getAbsolutePath());
131
132 return new InputSource(system_id); // problem solved
133 } else {
134 DebugStream.println("\t### file denoted by systemID exists, but not located inside GS: "
135 + f.getAbsolutePath());
136 }
137 } else {
138
139 temp_id = f.getName();
140
141 //check in list of search paths
142 Iterator<File> i = list_of_local_search_paths.iterator();
143 while(i.hasNext()) {
144 File searchPath = i.next();
145 DebugStream.println("### searching for entity '" + temp_id + "' in: "
146 + searchPath.getAbsolutePath());
147
148 File searchFile = new File(searchPath, temp_id);
149
150 if(searchFile.exists()) {
151 DebugStream.println("\t Found " + searchFile.getAbsolutePath());
152 return new InputSource("file://" + searchFile.getAbsolutePath());
153 }
154 }
155
156 // else, external entity/file denoted by systemid is not in list of search paths, try classloader
157
158 }
159 } else {
160
161 DebugStream.println("### Entity is not a file: " + system_id);
162
163 if (temp_id.indexOf("/")!= -1) {
164 temp_id = temp_id.substring(temp_id.lastIndexOf("/")+1);
165 }
166 }
167
168 DebugStream.println("### Using classloader to attempt to resolve entity: " + temp_id);
169
170 // try using a class loader. If none provided, use current class loader
171 if (this.class_loader==null) {
172 this.class_loader = this.getClass().getClassLoader();
173 }
174 URL url = class_loader.getResource(temp_id);
175 if (url == null) {
176 return null;
177 }
178
179 return new InputSource("file://"+url.getFile());
180 }
181}
Note: See TracBrowser for help on using the repository browser.