1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | * FedoraGS3DL.java - works with the demo-client for Greenstone 3, of the
|
---|
4 | * Greenstone digital library suite from the New Zealand Digital Library
|
---|
5 | * Project at the * University of Waikato, New Zealand.
|
---|
6 | * <BR><BR>
|
---|
7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
8 | * <BR><BR>
|
---|
9 | * This program is free software; you can redistribute it and/or modify
|
---|
10 | * it under the terms of the GNU General Public License as published by
|
---|
11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
12 | * (at your option) any later version.
|
---|
13 | * <BR><BR>
|
---|
14 | * This program is distributed in the hope that it will be useful,
|
---|
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | * GNU General Public License for more details.
|
---|
18 | *########################################################################
|
---|
19 | */
|
---|
20 |
|
---|
21 | package org.greenstone.fedora.services;
|
---|
22 |
|
---|
23 |
|
---|
24 | import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
|
---|
25 | import org.w3c.dom.Element;
|
---|
26 | import org.xml.sax.SAXException;
|
---|
27 |
|
---|
28 | import java.io.IOException;
|
---|
29 | import java.io.UnsupportedEncodingException;
|
---|
30 | import java.rmi.RemoteException;
|
---|
31 |
|
---|
32 | import javax.xml.transform.TransformerException;
|
---|
33 | //import javax.xml.transform.TransformerConfigurationException;
|
---|
34 |
|
---|
35 | /**
|
---|
36 | * Defines the methods that must be provided to retrieve the datastreams
|
---|
37 | * specific to Greenstone documents stored in a Fedora repository.
|
---|
38 | * @author ak19
|
---|
39 | */
|
---|
40 | public interface FedoraGS3DL {
|
---|
41 | /* CONSTANTS (FOLLOWED BY METHOD DECLARATIONS) */
|
---|
42 |
|
---|
43 | /** Instead of message router, we indicate that request messages
|
---|
44 | * sent here come from FedoraGS3 */
|
---|
45 | public static final String FEDORA_GS3 = "FedoraGS3";
|
---|
46 |
|
---|
47 | // Some constant string literals we'll be dealing with
|
---|
48 |
|
---|
49 | /** "greenstone" is the FEDORA PID prefix we'll be working with
|
---|
50 | * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
|
---|
51 | */
|
---|
52 | public static final String GREENSTONE = "greenstone";
|
---|
53 | public static final String GREENSTONE_ = GREENSTONE+":";
|
---|
54 | public static final String COLLECTION = "collection";
|
---|
55 | public static final String _COLLECTION = "-"+COLLECTION;
|
---|
56 |
|
---|
57 | public static final String WILDCARD = "*";
|
---|
58 | public static final String HYPHEN = "-";
|
---|
59 | public static final String COLON = ":";
|
---|
60 | public static final String PERIOD = ".";
|
---|
61 |
|
---|
62 | // public static final String UTF16 = "UTF-16"; // not used
|
---|
63 | public static final String UTF8 = "UTF8";
|
---|
64 |
|
---|
65 | // attribute names
|
---|
66 | public static final String TITLE = "Title";
|
---|
67 | public static final String ID = "id";
|
---|
68 | public static final String NAME = "name";
|
---|
69 | public static final String COLLECTIONNAME = "collectionname";
|
---|
70 | public static final String QUALIFIER = "qualifier";
|
---|
71 | public static final String ENGLISH = "en";
|
---|
72 |
|
---|
73 | // tag names
|
---|
74 | public static final String METADATA = "metadata";
|
---|
75 | public static final String SECTION_ELEMENT = "Section";
|
---|
76 |
|
---|
77 | /** EX marks the XML metadata file that contains Greenstone extracted
|
---|
78 | * metadata */
|
---|
79 | public static final String EX = "EX";
|
---|
80 | /** Fedora's Dublin Core metadata */
|
---|
81 | public static final String DC = "DC";
|
---|
82 | /** DLS metadata of Greenstone documents - this metadata set is optionally
|
---|
83 | * provided for top level documents. Not all Greenstone top-level documents
|
---|
84 | * in the Fedora repository may have associated DLS metadata, however.*/
|
---|
85 | public static final String DLS = "DLS";
|
---|
86 | /** Table of contents for a Greenstone-Fedora document which outlines the
|
---|
87 | * structure of the document. */
|
---|
88 | public static final String TOC = "TOC";
|
---|
89 | /** The SECTION prefix in the name of a section's datastream;
|
---|
90 | * eg. SECTION1.2.2 */
|
---|
91 | public static final String SECTION = "SECTION";
|
---|
92 |
|
---|
93 |
|
---|
94 | /* METHOD DEFINITIONS */
|
---|
95 | /** @return the default language used to query for titles (and anything else
|
---|
96 | * where there are multiple language options). Upon initialisation, this
|
---|
97 | * defaults to English. */
|
---|
98 | public String getLanguage();
|
---|
99 |
|
---|
100 | /** Sets the the default language used to query for titles (and anything else
|
---|
101 | * where there are multiple language options). If the default language for any
|
---|
102 | * query is not available, then English ("en") is used. If that's not available
|
---|
103 | * then the first other available language is used.
|
---|
104 | * @param lang - the two-letter language code to set the default language to. */
|
---|
105 | public void setLanguage(String lang);
|
---|
106 |
|
---|
107 | /** The default maximum number of search results returned for a search. Upon
|
---|
108 | * initialisation, this defaults to Java's Integer.MAX_VALUE. */
|
---|
109 | public int getMaxResults();
|
---|
110 |
|
---|
111 | /** Set the default maximum number of search results returned for a search.
|
---|
112 | * @param maxresults - the new default maximum number of search results to
|
---|
113 | * be returned. */
|
---|
114 | public void setMaxResults(int maxresults);
|
---|
115 |
|
---|
116 | /** @return fedora's baseURL */
|
---|
117 | public String getBaseURL();
|
---|
118 |
|
---|
119 | /** @return the portAddressURL of the Fedora APIA web service
|
---|
120 | * (should be the endpoint location in the APIA's WSDL file).
|
---|
121 | * Else set this in the .properties file to something else. */
|
---|
122 | public String getPortAddressURL();
|
---|
123 |
|
---|
124 | /** @return the baseURL for gsdlAssocFiles */
|
---|
125 | public String getAssocFileBaseURL();
|
---|
126 |
|
---|
127 | /* GET COLLECTIONS, DOCUMENTS, SECTIONS, AND TITLES */
|
---|
128 |
|
---|
129 | /** Gets all greenstone collections. Searches for greenstone:*-collection.
|
---|
130 | * @return an array of Strings containing the pids of all collections
|
---|
131 | * matching the format greenstone:*-collection. */
|
---|
132 | public String[] getCollections() throws RemoteException;
|
---|
133 |
|
---|
134 | /** @return the <name>s (in greenstone:<name>-collection) for the collections
|
---|
135 | * indicated by collPIDs.
|
---|
136 | * @param collPIDs - an array of Strings denoting the pids for greenstone
|
---|
137 | * collections stored in the fedora repositoryl. These should be of the
|
---|
138 | * format "greenstone:<collectionName>-collection". */
|
---|
139 | public String[] getCollectionNames(String[] collPIDs);
|
---|
140 |
|
---|
141 | /** @return "greenstone:<name>-collection" for all <name>s in the
|
---|
142 | * parameter collNames.
|
---|
143 | * @param collNames - a list of names of greenstone collections
|
---|
144 | * stored in the fedora repository. */
|
---|
145 | public String[] getCollectionPIDs(String[] collNames);
|
---|
146 |
|
---|
147 | /** @return greenstone:<name>-collection for the <name> denoted by
|
---|
148 | * parameter collName.
|
---|
149 | * @param collName - the name of a greenstone collection stored
|
---|
150 | * stored in the fedora repository. */
|
---|
151 | public String getCollectionPID(String collName);
|
---|
152 |
|
---|
153 | /**
|
---|
154 | * Gets the title of the collection denoted by the given collection's pid by
|
---|
155 | * retrieving the title metadata for it from the collection's EX datastream.
|
---|
156 | * @return the title (in the default language, else English, else the
|
---|
157 | * first title found) for the particular collection denoted by its PID.
|
---|
158 | * @param collPID is the pid of a greenstone collection in the fedora
|
---|
159 | * repository. */
|
---|
160 | public String getCollectionTitle(String collPID)
|
---|
161 | throws RemoteException, UnsupportedEncodingException,
|
---|
162 | SAXException, IOException;
|
---|
163 |
|
---|
164 | /** @return the collection titles for all the collections indicated by
|
---|
165 | * collPIDs.
|
---|
166 | * @param collPIDs - a list of pids identifying greenstone collections
|
---|
167 | * stored in the fedora repository. */
|
---|
168 | public String[] getCollectionTitles(String[] collPIDs)
|
---|
169 | throws RemoteException, UnsupportedEncodingException,
|
---|
170 | SAXException, IOException;
|
---|
171 |
|
---|
172 | /** @return the title metadata for the given doc objects of a collection.
|
---|
173 | * These titles are returned in the same order as the given docIDs.
|
---|
174 | * @param docPIDs - a list of pids identifying documents stored in the
|
---|
175 | * fedora repository. */
|
---|
176 | public String[] getDocTitles(String[] docPIDs)
|
---|
177 | throws RemoteException, UnsupportedEncodingException,
|
---|
178 | SAXException, IOException;
|
---|
179 |
|
---|
180 | /** Gets the title metadata for a particular doc object in a collection
|
---|
181 | * denoted by docPID. The docPID already contains the collection name.
|
---|
182 | * @return the title for the fedora document item denoted by docPID
|
---|
183 | * @param docPID is the pid of the document in the fedora repository
|
---|
184 | * (docPID is of the form greenstone:<colName>-<doc-identifier> */
|
---|
185 | public String getDocTitle(String docPID)
|
---|
186 | throws RemoteException, UnsupportedEncodingException,
|
---|
187 | SAXException, IOException;
|
---|
188 |
|
---|
189 | /** @return the title metadata for the given document sections.
|
---|
190 | * These titles are returned in the same order as the given docPIDs
|
---|
191 | * and associated sectionIDs.
|
---|
192 | * @param docPIDs - a list of pids identifying documents stored in the
|
---|
193 | * fedora repository.
|
---|
194 | * @param sectionIDs - a list of sectionIDs identifying individual sections
|
---|
195 | * of documents stored in the fedora repository whose titles are requested. */
|
---|
196 | public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
|
---|
197 | throws RemoteException, UnsupportedEncodingException,
|
---|
198 | SAXException, IOException;
|
---|
199 |
|
---|
200 | /** @return the title metadata for the given document section.
|
---|
201 | * (The docPID already contain the collection name anyway.)
|
---|
202 | * @param docPID - a pid identifying a document in the fedora repository.
|
---|
203 | * @param sectionID - the sectionID of the section of the
|
---|
204 | * document whose title is requested. */
|
---|
205 | public String getSectionTitle(String docPID, String sectionID)
|
---|
206 | throws UnsupportedEncodingException, RemoteException,
|
---|
207 | SAXException, IOException;
|
---|
208 |
|
---|
209 | /** @return a list of the fedora pids of all (document) objects in the
|
---|
210 | * given greenstone collection stored in fedora's repository. All
|
---|
211 | * pids that do not map to a collection are assumed to be documents.
|
---|
212 | * @param colPID is the pid of the greenstone collection stored in
|
---|
213 | * the fedora repository. */
|
---|
214 | public String[] getCollectionDocs(String colPID) throws RemoteException;
|
---|
215 |
|
---|
216 | /** Given the pid of a document fedora data object, this method will return
|
---|
217 | * all itemIDs that are part of that data object and are Sections.
|
---|
218 | * @return an array of itemIDs of the Sections of the document,
|
---|
219 | * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
|
---|
220 | * @param docPID is a fedora pid identifying a greenstone document object. */
|
---|
221 | public String[] getSectionNames(String docPID) throws RemoteException;
|
---|
222 |
|
---|
223 | /** Given the pid of a document fedora data object, this method will return all
|
---|
224 | * itemIDs that are part of that data object and are Sections, but just the
|
---|
225 | * Section numbers are returned.
|
---|
226 | * @return an array of itemIDs of the Section numbers of the document
|
---|
227 | * indicated by docPID, in ascending order. Return values are of form: "1.*".
|
---|
228 | * @param docPID is a fedora pid identifying a greenstone document object. */
|
---|
229 | public String[] getSectionNumbers(String docPID) throws RemoteException;
|
---|
230 |
|
---|
231 | /** @return the titles for the document sections denoted by the parameters.
|
---|
232 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
233 | * @param sectionIDs is a list of identifiers identifying sections in the
|
---|
234 | * document denoted by docPID, whose titles need to be returned. Each
|
---|
235 | * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
|
---|
236 | * or a section number (eg. 1.5.1). */
|
---|
237 | public String[] getTitles(String docPID, String[] sectionIDs)
|
---|
238 | throws RemoteException, UnsupportedEncodingException,
|
---|
239 | SAXException, IOException;
|
---|
240 |
|
---|
241 | /** @return the title for the document section denoted by the parameters.
|
---|
242 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
243 | * @param sectionID identifies the particular section in the
|
---|
244 | * document denoted by docPID, whose title needs to be returned. The
|
---|
245 | * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
|
---|
246 | * or a section number (eg. 1.5.1). */
|
---|
247 | public String getTitle(String docPID, String sectionID)
|
---|
248 | throws RemoteException, UnsupportedEncodingException,
|
---|
249 | SAXException, IOException;
|
---|
250 |
|
---|
251 | /** @return the <docName> in the parameter docPID (which is of the form:
|
---|
252 | * greenstone:<colname>-<docName>)
|
---|
253 | * @param docPID is the pid of a greenstone document in the fedora
|
---|
254 | * repository. */
|
---|
255 | public String getDocName(String docPID);
|
---|
256 |
|
---|
257 | /** @return the <name> in the parameter collPID
|
---|
258 | * (greenstone:<name>-collection)
|
---|
259 | * @param collPID is the pid of a greenstone collection in the fedora
|
---|
260 | * repository.
|
---|
261 | */
|
---|
262 | public String getCollectionName(String collPID);
|
---|
263 |
|
---|
264 | /* GETTING A DOCUMENT OR SECTION'S DATA STREAMS */
|
---|
265 |
|
---|
266 | /* All "greenstone:*" objects in fedora (be they collections or documents)
|
---|
267 | * have TOC, EX and DC datastreams. The following methods return the content
|
---|
268 | * (XML) of these datastreams as is. */
|
---|
269 | /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
|
---|
270 | * top-level documents or document sections) have an EX datastream. This method
|
---|
271 | * returns the content (XML) of the DC datastream as it is stored in fedora's
|
---|
272 | * repository.
|
---|
273 | * @return a String version of the XML in the DC datastream for the fedora object
|
---|
274 | * denoted by pid.
|
---|
275 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
276 | * repository. */
|
---|
277 | public String getDC(String pid) throws RemoteException, UnsupportedEncodingException;
|
---|
278 |
|
---|
279 | /** All "greenstone:*" objects in fedora (be they collections, top-level
|
---|
280 | * documents or document sections) have an EX datastream. This method
|
---|
281 | * returns the content (XML) of the EX datastream as is.
|
---|
282 | * @return a String version of the XML in the DC datastream for the fedora
|
---|
283 | * object denoted by pid.
|
---|
284 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
285 | * repository. */
|
---|
286 | public String getEX(String pid) throws RemoteException, UnsupportedEncodingException;
|
---|
287 |
|
---|
288 | /**
|
---|
289 | * Some "greenstone:*" top-level documents in the fedora repository (but not
|
---|
290 | * greenstone collections or document sections) have a DLS metadata datastream.
|
---|
291 | * This method returns the content (XML) of the DLS datastream as is.
|
---|
292 | * @return a String version of the XML in the DLS datastream for the fedora
|
---|
293 | * object denoted by pid.
|
---|
294 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
295 | * repository.
|
---|
296 | * */
|
---|
297 | public String getDLS(String pid) throws RemoteException, UnsupportedEncodingException;
|
---|
298 |
|
---|
299 | /** All "greenstone:*" objects in fedora (be they collections or documents)
|
---|
300 | * have a TOC datastream. This method returns the content (XML) of the TOC
|
---|
301 | * datastream as is. (Calls default fedora-system 3 dissemination <pid>/TOC.)
|
---|
302 | * @return a String version of the XML in the DC datastream for the fedora
|
---|
303 | * object denoted by pid.
|
---|
304 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
305 | * repository.
|
---|
306 | */
|
---|
307 | public String getTOC(String pid) throws RemoteException, UnsupportedEncodingException;
|
---|
308 |
|
---|
309 | /** @return the section's XML (as a String) as it is stored in fedora.
|
---|
310 | * Works out if sectionID is a sectionName or sectionNumber.
|
---|
311 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
312 | * @param sectionID identifies the particular section in the
|
---|
313 | * document denoted by docPID, may be a section name or number. */
|
---|
314 | public String getSection(String docPID, String sectionID)
|
---|
315 | throws RemoteException, UnsupportedEncodingException;
|
---|
316 |
|
---|
317 | /** @return the required section's DC metadata XML datastream.
|
---|
318 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
319 | * @param sectionID identifies the particular section in the
|
---|
320 | * document denoted by docPID, may be a section name or number. */
|
---|
321 | public String getSectionDCMetadata(String docPID, String sectionID)
|
---|
322 | throws RemoteException, UnsupportedEncodingException;
|
---|
323 |
|
---|
324 | /** @return the required section's EX metadata XML datastream.
|
---|
325 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
326 | * @param sectionID identifies the particular section in the
|
---|
327 | * document denoted by docPID, may be a section name or number. */
|
---|
328 | public String getSectionEXMetadata(String docPID, String sectionID)
|
---|
329 | throws RemoteException, UnsupportedEncodingException;
|
---|
330 |
|
---|
331 | /* METHODS FOR GETTING THE STRUCTURE OF DOCUMENTS */
|
---|
332 |
|
---|
333 | /** @return the XML content of the TOC of just that portion of the TOC which
|
---|
334 | * contains the section denoted by sectionID and its direct child subsections.
|
---|
335 | * The children are returned in the order they are encountered, which
|
---|
336 | * happens to be in the required order of ascending sectionID.
|
---|
337 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
338 | * @param sectionID identifies the particular section in the
|
---|
339 | * document denoted by docPID, may be a section name or number. */
|
---|
340 | public Element getChildrenOfSectionXML(String docPID, String sectionID)
|
---|
341 | throws RemoteException, UnsupportedEncodingException,
|
---|
342 | SAXException, IOException;
|
---|
343 |
|
---|
344 | /** @return a string representing the XML content of the TOC of just
|
---|
345 | * that portion of the TOC which contains the section denoted by sectionID
|
---|
346 | * and its direct child subsections.
|
---|
347 | * The children are returned in the order they are encountered, which
|
---|
348 | * happens to be in the required order of ascending sectionID.
|
---|
349 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
350 | * @param sectionID identifies the particular section in the
|
---|
351 | * document denoted by docPID, may be a section name or number. */
|
---|
352 | public String getChildrenOfSection(String docPID, String sectionID)
|
---|
353 | throws RemoteException, UnsupportedEncodingException,
|
---|
354 | SAXException, IOException, TransformerException;
|
---|
355 |
|
---|
356 | /** @return the part of the TOC XML file (which outlines doc structure)
|
---|
357 | * relating to the given section. This includes the section denoted by
|
---|
358 | * sectionID as well as all descendent subsections thereof.
|
---|
359 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
360 | * @param sectionID identifies the particular section in the
|
---|
361 | * document denoted by docPID, may be a section name or number. */
|
---|
362 | public Element getSubsectionXML(String docPID, String sectionID)
|
---|
363 | throws RemoteException, UnsupportedEncodingException,
|
---|
364 | SAXException, IOException;
|
---|
365 |
|
---|
366 | /** @return a String representation of the part of the TOC XML file
|
---|
367 | * (which outlines doc structure) relating to the given section. This
|
---|
368 | * includes the section denoted by sectionID as well as all descendent
|
---|
369 | * subsections thereof.
|
---|
370 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
371 | * @param sectionID identifies the particular section in the
|
---|
372 | * document denoted by docPID, may be a section name or number. */
|
---|
373 | public String getSubsection(String docPID, String sectionID)
|
---|
374 | throws RemoteException, UnsupportedEncodingException, SAXException,
|
---|
375 | IOException, TransformerException;
|
---|
376 |
|
---|
377 | /* BROWSING */
|
---|
378 |
|
---|
379 | /** Allows browsing document titles of a greenstone collection stored in
|
---|
380 | * the fedora repository by letter.
|
---|
381 | * @return the browse results for documents that start with any letter from
|
---|
382 | * A to Z. Returns the document pids whose titles start with the given letter.
|
---|
383 | * @param letter is the starting letter to browse by. */
|
---|
384 | public String[] browseTitlesByLetter(String collName, String letter)
|
---|
385 | throws RemoteException, FedoraVersionNotSupportedException;
|
---|
386 |
|
---|
387 | /** Allows querying document titles of a greenstone collection stored in
|
---|
388 | * the fedora repository for a term that may occur anywhere in their titles.
|
---|
389 | * @return the document pids whose titles contain the parameter term.
|
---|
390 | * @param titleContents is the word or phrase to search the collection's
|
---|
391 | * document titles for.
|
---|
392 | * @param startsWith - if true, searches for titles that start with
|
---|
393 | * titleContents. Else it searches for titles that contain titleContents. */
|
---|
394 | public String[] searchDocumentTitles(String collName, String titleContents,
|
---|
395 | boolean startsWith)
|
---|
396 | throws RemoteException, FedoraVersionNotSupportedException;
|
---|
397 |
|
---|
398 | /* The following single method can be used instead.
|
---|
399 | * ComparisonOperator can be contains or startswith. */
|
---|
400 | /* public String[] browse(String term, ComparisonOperator?); */
|
---|
401 | } |
---|