source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/FedoraServiceProxy.java

Last change on this file was 33768, checked in by kjdon, 4 years ago

removed some code that was commented out, and some methods that were identical to the base class methods

File size: 30.0 KB
Line 
1/*
2 * FedoraServiceProxy.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// greenstone classes
22import java.io.StringReader;
23import java.util.HashMap;
24import java.util.Properties;
25import java.util.regex.Matcher;
26import java.util.regex.Pattern;
27
28import javax.xml.parsers.DocumentBuilder;
29import javax.xml.parsers.DocumentBuilderFactory;
30
31import org.apache.log4j.Logger;
32import org.greenstone.gs3client.dlservices.DigitalLibraryServicesAPIA;
33import org.greenstone.gs3client.dlservices.FedoraServicesAPIA;
34import org.greenstone.gsdl3.core.MessageRouter;
35import org.greenstone.gsdl3.util.Dictionary;
36import org.greenstone.gsdl3.util.GSPath;
37import org.greenstone.gsdl3.util.GSXML;
38import org.greenstone.gsdl3.util.MacroResolver;
39import org.greenstone.gsdl3.util.OID;
40import org.greenstone.gsdl3.util.XMLConverter;
41import org.greenstone.util.ProtocolPortProperties;
42import org.w3c.dom.Document;
43import org.w3c.dom.Element;
44import org.w3c.dom.Node;
45import org.w3c.dom.NodeList;
46import org.xml.sax.InputSource;
47
48import org.apache.commons.lang3.StringUtils;
49
50/*
51// greenstone classes
52import org.greenstone.gsdl3.util.*;
53import org.greenstone.gsdl3.core.*;
54
55// for fedora
56import org.greenstone.gs3client.dlservices.*;
57import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
58
59// xml classes
60import org.w3c.dom.Node;
61import org.w3c.dom.NodeList;
62import org.w3c.dom.Element;
63import org.w3c.dom.Document;
64import org.xml.sax.InputSource;
65import javax.xml.parsers.*;
66import org.apache.xpath.XPathAPI;
67
68// general java classes
69import java.io.Reader;
70import java.io.StringReader;
71import java.io.File;
72import java.util.HashMap;
73import java.util.Locale;
74import java.util.Properties;
75import java.util.ResourceBundle;
76import java.util.regex.*;
77import java.lang.reflect.Method;
78*/
79
80import org.apache.log4j.*;
81
82/**
83 * FedoraServiceProxy - communicates with the FedoraGS3 interface.
84 *
85 * @author Anupama Krishnan
86 */
87public class FedoraServiceProxy
88 extends ServiceRack implements OID.OIDTranslatable
89{
90
91 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.FedoraServiceProxy.class.getName());
92 protected MacroResolver macro_resolver = null;
93
94
95 /** The handle to the fedora connection */
96 private DigitalLibraryServicesAPIA fedoraServicesAPIA;
97
98 private String prevLanguage = "";
99
100 public void cleanUp() {
101 super.cleanUp();
102 }
103
104 /** sets the message router */
105 public void setMessageRouter(MessageRouter m) {
106 this.router = m;
107 setLibraryName(m.getLibraryName());
108 }
109
110 /** the no-args constructor */
111 public FedoraServiceProxy() {
112 super();
113
114 this.macro_resolver = new BasicTextMacroResolver();
115 }
116
117
118 /* configure the service module
119 *
120 * @param info the XML node <serviceRack name="XXX"/> with name equal
121 * to the class name (of the subclass)
122 *
123 * must configure short_service_info_ and service_info_map_
124 * @return true if configured ok
125 * must be implemented in subclasses
126 */
127 /*public boolean configure(Element info) {
128 return configure(info, null);
129 }*/
130
131 public boolean configure(Element info, Element extra_info) {
132 // set up the class loader
133
134 if (!super.configure(info, extra_info)){
135 return false;
136 }
137
138 // Try to instantiate a Fedora dl handle
139 try {
140 // Fedora connection settings defaults.
141 // Read host and port from global.properties, since by default, we expect the Greenstone server to be used
142 Properties globalProperties = new Properties();
143 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
144
145 ProtocolPortProperties protocolPortProps = new ProtocolPortProperties(globalProperties); // can throw Exception
146 String host = globalProperties.getProperty("tomcat.server", "localhost");
147 String protocol = protocolPortProps.getProtocol();
148 String port = protocolPortProps.getPort();
149
150 String username = "fedoraIntCallUser"; //"fedoraAdmin"
151 String password = "changeme"; //"<user password>"
152
153 // See if buildConfig.xml overrides any of the defaults
154 // info is the <serviceRack> Element from buildConfig.xml (extra_info are the Elements of collectionConfig.xml)
155
156 NodeList nodes = info.getElementsByTagName("fedoraConnection");
157 if(nodes != null && nodes.getLength() > 0) {
158
159 Element fedoraElement = (Element)nodes.item(0);
160 if(fedoraElement.hasAttribute("protocol")) {
161 protocol = fedoraElement.getAttribute("protocol");
162 }
163 if(fedoraElement.hasAttribute("host")) {
164 host = fedoraElement.getAttribute("host");
165 }
166 if(fedoraElement.hasAttribute("port")) {
167 port = fedoraElement.getAttribute("port");
168 }
169 if(fedoraElement.hasAttribute("username")) {
170 username = fedoraElement.getAttribute("username");
171 }
172 if(fedoraElement.hasAttribute("password")) {
173 password = fedoraElement.getAttribute("password");
174 }
175 }
176
177 fedoraServicesAPIA = new FedoraServicesAPIA(protocol, host, Integer.parseInt(port), username, password);
178
179 } catch(org.greenstone.fedora.services.FedoraGS3Exception.CancelledException e) {
180 // The user pressed cancel in the fedora services instantiation dialog
181 return false;
182 } catch(Exception e) {
183 logger.error("Error instantiating the interface to the Fedora Repository: " + e.getMessage() + "\n", e); // second parameter prints e's stacktrace
184 return false; // configure has failed
185 }
186
187
188 // Need to put the available services into short_service_info
189 // This is used by DefaultReceptionist.process() has an exception. But DefaultReceptionist.addExtraInfo()
190 // isn't helpful, and the problem actually already occurs in
191 // Receptionist.process() -> PageAction.process() -> MessageRouter.process()
192 // -> Collection/ServiceCluster.process() -> ServiceCluster.configureServiceRackList()
193 // -> ServiceRack.process() -> ServiceRack.processDescribe() -> ServiceRack.getServiceList().
194 // ServiceRack.getServiceList() requires this ServiceRack's services to be filled into the
195 // short_service_info Element which needs to be done in this FedoraServiceProxy.configure().
196
197 // get the display and format elements from the coll config file for
198 // the classifiers
199 AbstractBrowse.extractExtraClassifierInfo(info, extra_info);
200
201 // Copied from IViaProxy.java:
202 String collection = fedoraServicesAPIA.describeCollection(this.cluster_name);
203
204 Element collNode = getResponseAsDOM(collection);
205 Element serviceList = (Element)collNode.getElementsByTagName(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER).item(0);
206
207//this.short_service_info.appendChild(short_service_info.getOwnerDocument().importNode(serviceList, true));
208 // we want the individual service Elements, not the serviceList Element which will wrap it later
209 NodeList services = collNode.getElementsByTagName(GSXML.SERVICE_ELEM);
210 for(int i = 0; i < services.getLength(); i++) {
211 Node service = services.item(i);
212 this.short_service_info.appendChild(short_service_info.getOwnerDocument().importNode(service, true));
213 }
214
215 // add some format info to service map if there is any
216 String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM);
217 Element search_format = (Element) GSXML.getNodeByPath(extra_info, path);
218 if (search_format != null) {
219 this.format_info_map.put("TextQuery", this.desc_doc.importNode(search_format, true));
220 this.format_info_map.put("FieldQuery", this.desc_doc.importNode(search_format, true));
221 }
222
223 // look for document display format
224 path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
225 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
226 if (display_format != null) {
227 this.format_info_map.put("DocumentContentRetrieve", this.desc_doc.importNode(display_format, true));
228 // should we make a copy?
229 }
230
231 // the format info
232 Element cb_format_info = this.desc_doc.createElement(GSXML.FORMAT_ELEM);
233 boolean format_found = false;
234
235 // look for classifier <browse><format>
236 path = GSPath.appendLink(GSXML.BROWSE_ELEM, GSXML.FORMAT_ELEM);
237 Element browse_format = (Element)GSXML.getNodeByPath(extra_info, path);
238 if (browse_format != null) {
239 cb_format_info.appendChild(GSXML.duplicateWithNewName(this.desc_doc, browse_format, GSXML.DEFAULT_ELEM, true));
240 format_found = true;
241 }
242
243 // add in to the description a simplified list of classifiers
244 Element browse = (Element)GSXML.getChildByTagName(extra_info, "browse"); // the <browse>
245 NodeList classifiers = browse.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
246 for(int i=0; i<classifiers.getLength(); i++) {
247 Element cl = (Element)classifiers.item(i);
248 Element new_cl = (Element)this.desc_doc.importNode(cl, false); // just import this node, not the children
249
250 // get the format info out, and put inside a classifier element
251 Element format_cl = (Element)new_cl.cloneNode(false);
252 Element format = (Element)GSXML.getChildByTagName(cl, GSXML.FORMAT_ELEM);
253 if (format != null) {
254
255 //copy all the children
256 NodeList elems = format.getChildNodes();
257 for (int j=0; j<elems.getLength();j++) {
258 format_cl.appendChild(this.desc_doc.importNode(elems.item(j), true));
259 }
260 cb_format_info.appendChild(format_cl);
261 format_found = true;
262 }
263
264 }
265
266 if (format_found) {
267 this.format_info_map.put("ClassifierBrowse", cb_format_info);
268 }
269
270
271 // set up the macro resolver
272 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
273 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
274 if (replacement_elem != null) {
275 macro_resolver.addMacros(replacement_elem);
276 }
277 // look for any refs to global replace lists
278 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
279 for (int i=0; i<replace_refs_elems.getLength(); i++) {
280 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
281 if (!id.equals("")) {
282 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
283 if (replace_list != null) {
284 macro_resolver.addMacros(replace_list);
285 }
286 }
287 }
288
289 // configured ok
290 return true;
291 }
292
293
294 /* "DocumentContentRetrieve", "DocumentMetadataRetrieve", "DocumentStructureRetrieve",
295 "TextQuery", "FieldQuery", "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve" */
296
297 protected Element processDocumentContentRetrieve(Element request) {
298 String[] docIDs = parse(request, GSXML.DOC_NODE_ELEM, GSXML.NODE_ID_ATT);
299 String[] relLinks = parse(request, GSXML.DOC_NODE_ELEM, "externalURL");
300
301 //logger.error("### request:");
302 //logger.error(GSXML.elementToString(request, true));
303
304 if(docIDs == null) {
305 logger.error("DocumentContentRetrieve request specified no doc nodes.\n");
306 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
307 } else {
308 for(int i = 0; i < docIDs.length; i++) {
309 //logger.error("BEFORE: docIDs[" + i + "]: " + docIDs[i]);
310 if(relLinks[i] != null && docIDs[i].startsWith("http://")) { // need to do a look up
311 docIDs[i] = translateExternalId(docIDs[i]);
312 } else {
313 docIDs[i] = translateId(docIDs[i]);
314 }
315 //logger.error("AFTER: docIDs[" + i + "]: " + docIDs[i]);
316 }
317 }
318
319 String lang = request.getAttribute(GSXML.LANG_ATT);
320 if(!lang.equals(prevLanguage)) {
321 prevLanguage = lang;
322 fedoraServicesAPIA.setLanguage(lang);
323 }
324
325 // first param (the collection) is not used by Fedora
326 Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentContent(this.cluster_name, docIDs));
327
328
329 // resolve any collection specific macros
330 NodeList nodeContents = response.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
331 for(int i = 0; i < nodeContents.getLength(); i++) {
332 Element nodeContent = (Element)nodeContents.item(i);
333 /*if(nodeContent != null) {
334 nodeContent = (Element)nodeContent.getFirstChild(); // textNode
335 }*/
336 //logger.error("GIRAFFE 1. content retrieve response - nodeContent: " + GSXML.nodeToFormattedString(nodeContent));
337 String docContent = nodeContent.getFirstChild().getNodeValue(); // getTextNode and get its contents.
338 //logger.error("GIRAFFE 2. content retrieve response - docContent: " + docContent);
339
340 if(docContent != null) {
341 // get document text and resolve and macros. Rel and external links have _httpextlink_ set by HTMLPlugin
342 docContent = macro_resolver.resolve(docContent, lang, MacroResolver.SCOPE_TEXT, ""); // doc_id
343 nodeContent.getFirstChild().setNodeValue(docContent);
344 //logger.error("GIRAFFE 3. content retrieve response. Updated docContent: " + docContent);
345 }
346 }
347
348 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
349 }
350
351 protected Element processDocumentStructureRetrieve(Element request) {
352 String[] docIDs = parse(request, GSXML.DOC_NODE_ELEM, GSXML.NODE_ID_ATT);
353 String[] relLinks = parse(request, GSXML.DOC_NODE_ELEM, "externalURL");
354
355 if(docIDs == null) {
356 logger.error("DocumentStructureRetrieve request specified no doc nodes.\n");
357 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
358 } else {
359 for(int i = 0; i < docIDs.length; i++) {
360 //logger.error("BEFORE: docIDs[" + i + "]: " + docIDs[i]);
361 if(relLinks[i] != null && docIDs[i].startsWith("http://")) { // need to do a look up
362 docIDs[i] = translateExternalId(docIDs[i]);
363 } else {
364 docIDs[i] = translateId(docIDs[i]);
365 }
366 }
367 }
368
369 NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM);
370 String structure="";
371 String info="";
372 for(int i = 0; i < params.getLength(); i++) {
373 Element param = (Element)params.item(i);
374 if(param.getAttribute("name").equals("structure")) {
375 structure = structure + param.getAttribute("value") + "|";
376 } else if(param.getAttribute("name").equals("info")) {
377 info = info + param.getAttribute("value") + "|";
378 }
379 }
380
381 String lang = request.getAttribute(GSXML.LANG_ATT);
382 if(!lang.equals(prevLanguage)) {
383 prevLanguage = lang;
384 fedoraServicesAPIA.setLanguage(lang);
385 }
386 Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentStructure(
387 this.cluster_name, docIDs, new String[]{structure}, new String[]{info}));
388 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
389 }
390
391 protected Element processDocumentMetadataRetrieve(Element request) {
392 String[] docIDs = parse(request, GSXML.DOC_NODE_ELEM, GSXML.NODE_ID_ATT);
393 String[] relLinks = parse(request, GSXML.DOC_NODE_ELEM, "externalURL");
394
395 if(docIDs == null) {
396 logger.error("DocumentMetadataRetrieve request specified no doc nodes.\n");
397 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
398 } else {
399 for(int i = 0; i < docIDs.length; i++) {
400 //logger.error("**** relLinks[i]: " + relLinks[i]);
401 //logger.error("**** docIDs[i]: " + docIDs[i]);
402 if(relLinks[i] != null && docIDs[i].startsWith("http://")) { // need to do a look up
403 docIDs[i] = translateExternalId(docIDs[i]);
404 } else {
405 docIDs[i] = translateId(docIDs[i]);
406 }
407 //logger.error("AFTER: docIDs[" + i + "]: " + docIDs[i]);
408 }
409 }
410
411 NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM);
412 String[] metafields = {};
413 if(params.getLength() > 0) {
414 metafields = new String[params.getLength()];
415 for(int i = 0; i < metafields.length; i++) {
416 Element param = (Element)params.item(i);
417 //if(param.hasAttribute(GSXML.NAME_ATT) && param.getAttribute(GSXML.NAME_ATT).equals("metadata") && param.hasAttribute(GSXML.VALUE_ATT)) {
418 if(param.hasAttribute(GSXML.VALUE_ATT)){
419 metafields[i] = param.getAttribute(GSXML.VALUE_ATT);
420 } else {
421 metafields[i] = "";
422 }
423 }
424 }
425
426 String lang = request.getAttribute(GSXML.LANG_ATT);
427 if(!lang.equals(prevLanguage)) {
428 prevLanguage = lang;
429 fedoraServicesAPIA.setLanguage(lang);
430 }
431 Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentMetadata(
432 this.cluster_name, docIDs, metafields));
433 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
434 }
435
436 protected Element processClassifierBrowseMetadataRetrieve(Element request) {
437 String[] classIDs = parse(request, GSXML.CLASS_NODE_ELEM, GSXML.NODE_ID_ATT);
438 //String[] relLinks = parse(request, GSXML.CLASS_NODE_ELEM, "externalURL");
439
440 if(classIDs == null) {
441 logger.error("ClassifierBrowseMetadataRetrieve request specified no classifier nodes.\n");
442 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
443 } else {
444 for(int i = 0; i < classIDs.length; i++) {
445 classIDs[i] = translateId(classIDs[i]);
446 }
447 }
448
449 NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM);
450 String[] metafields = {};
451 if(params.getLength() > 0) {
452 metafields = new String[params.getLength()];
453 for(int i = 0; i < metafields.length; i++) {
454 Element param = (Element)params.item(i);
455 if(param.hasAttribute(GSXML.VALUE_ATT)){
456 metafields[i] = param.getAttribute(GSXML.VALUE_ATT);
457 } else {
458 metafields[i] = "";
459 }
460 }
461 }
462
463 String lang = request.getAttribute(GSXML.LANG_ATT);
464 if(!lang.equals(prevLanguage)) {
465 prevLanguage = lang;
466 fedoraServicesAPIA.setLanguage(lang);
467 }
468 Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseMetadata(
469 this.cluster_name, "ClassifierBrowseMetadataRetrieve", classIDs, metafields));
470 //logger.error("**** Response from retrieveBrowseMeta: " + GSXML.elementToString(response, true));
471 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
472 }
473
474 protected Element processClassifierBrowse(Element request) {
475 String collection = this.cluster_name;
476 String lang = request.getAttribute(GSXML.LANG_ATT);
477 if(!lang.equals(prevLanguage)) {
478 prevLanguage = lang;
479 fedoraServicesAPIA.setLanguage(lang);
480 }
481
482 NodeList classNodes = request.getElementsByTagName(GSXML.CLASS_NODE_ELEM);
483 if(classNodes == null || classNodes.getLength() <= 0) {
484 logger.error("ClassifierBrowse request specified no classifier IDs.\n");
485 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
486 }
487 String classifierIDs[] = new String[classNodes.getLength()];
488 for(int i = 0; i < classifierIDs.length; i++) {
489 Element e = (Element)classNodes.item(i);
490 classifierIDs[i] = e.getAttribute(GSXML.NODE_ID_ATT);
491 classifierIDs[i] = translateId(classifierIDs[i]);
492 }
493
494 NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM);
495 String structure="";
496 String info="";
497 for(int i = 0; i < params.getLength(); i++) {
498 Element param = (Element)params.item(i);
499 if(param.getAttribute("name").equals("structure")) {
500 structure = structure + param.getAttribute("value") + "|";
501 } else if(param.getAttribute("name").equals("info")) {
502 info = info + param.getAttribute("value") + "|";
503 }
504 }
505 ///structure = structure + "siblings"; //test for getting with classifier browse structure: siblings
506
507 Element response
508 = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseStructure(collection, "ClassifierBrowse", classifierIDs,
509 new String[] {structure}, new String[] {info}));
510 //logger.error("**** FedoraServiceProxy - Response from retrieveBrowseStructure: " + GSXML.elementToString(response, true));
511
512 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
513 }
514
515 protected Element processTextQuery(Element request) {
516 return processQuery(request, "TextQuery");
517 }
518
519 protected Element processFieldQuery(Element request) {
520 return processQuery(request, "FieldQuery");
521 }
522
523 protected Element processQuery(Element request, String querytype) {
524 String collection = this.cluster_name;
525
526 String lang = request.getAttribute(GSXML.LANG_ATT);
527 if(!lang.equals(prevLanguage)) {
528 prevLanguage = lang;
529 fedoraServicesAPIA.setLanguage(lang);
530 }
531
532 NodeList paramNodes = request.getElementsByTagName(GSXML.PARAM_ELEM);
533 if(paramNodes.getLength() > 0) {
534 HashMap<String, String> params = new HashMap<String, String>(paramNodes.getLength());
535 for(int i = 0; i < paramNodes.getLength(); i++) {
536 Element param = (Element)paramNodes.item(i);
537 params.put(param.getAttribute(GSXML.NAME_ATT), param.getAttribute(GSXML.VALUE_ATT));
538 }
539
540 Element response = getResponseAsDOM(fedoraServicesAPIA.query(collection, querytype, params));
541 return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0);
542 } else {
543 logger.error("TextQuery request specified no parameters.\n");
544 return XMLConverter.newDOM().createElement(GSXML.RESPONSE_ELEM); // empty response
545 }
546 }
547
548 // get the requested nodeIDs out of a request message
549 protected String[] parse(Element request, String nodeType, String attribute) {
550 String[] nodevalues = null;
551 int count = 0;
552
553 Element docList = (Element) GSXML.getChildByTagName(request, nodeType+GSXML.LIST_MODIFIER);
554 if (docList != null) {
555 NodeList docNodes = docList.getElementsByTagName(nodeType);
556 if(docNodes.getLength() > 0) {
557 nodevalues = new String[docNodes.getLength()];
558 for(int i = 0; i < nodevalues.length; i++) {
559 Element e = (Element)docNodes.item(i);
560 String id = e.getAttribute(attribute);
561 // Not sure why there are at times requests for hashXXX.dir, which is not a fedora PID
562 // To skip these: if not requesting an externalURL and if requesting a docNode,
563 // then the ID has to contain the : character special to fedora PIDs
564 if(attribute == "externalURL" || (nodeType != GSXML.DOC_NODE_ELEM || id.contains(":"))) {
565 nodevalues[count++] = id;
566 }
567 }
568 }
569 }
570
571 if(count == 0) {
572 return null;
573 }
574
575 String[] tmp = new String[count];
576 for(int i = 0; i < count; i++) {
577 tmp[i] = nodevalues[i];
578 }
579 nodevalues = null;
580 nodevalues = tmp;
581
582 return nodevalues;
583 }
584
585
586 /** if id ends in .fc, .pc etc, then translate it to the correct id
587 * For now (for testing things work) the default implementation is to just remove the suffix */
588 protected String translateId(String id) {
589 if (OID.needsTranslating(id)) {
590 return OID.translateOID(this, id); //return translateOID(id);
591 }
592 return id;
593 }
594
595 /** if an id is not a greenstone id (an external id) then translate
596 * it to a greenstone one
597 * default implementation: return the id. Custom implementation:
598 * the id is a url that maps to a fedorapid whose dc.title contains the required HASHID */
599 protected String translateExternalId(String id) {
600 //logger.error("*** to translate an external ID: " + id); /////return id;
601 return this.externalId2OID(id);
602 }
603
604 /** converts an external id to greenstone OID. External ID is a URL link
605 * that, if relative, maps to a fedorapid that has an entry in fedora.
606 * The dc:title meta for that fedorapid will contain the required OID. */
607 public String externalId2OID(String extid) {
608 if(extid.endsWith(".rt") && (extid.indexOf('.') != extid.lastIndexOf('.'))) {
609 // .rt is not file extension, but Greenstone request for root of document
610 // not relevant for external ID
611 extid = extid.substring(0, extid.length()-3);
612 }
613
614 // the following method is unique to FedoraServicesAPIA
615 String response = ((FedoraServicesAPIA)fedoraServicesAPIA).getDocIDforURL(extid, this.cluster_name);
616 if(response.indexOf(GSXML.ERROR_ELEM) != -1) {
617 logger.error("**** The following error occurred when trying to find externalID for ID " + extid);
618 logger.error(response);
619 return extid;
620 }
621 if(response.equals("")) {
622 return extid;
623 } else {
624 return response;
625 }
626 }
627
628
629 /** translates relative oids into proper oids:
630 * .pr (parent), .rt (root) .fc (first child), .lc (last child),
631 * .ns (next sibling), .ps (previous sibling)
632 * .np (next page), .pp (previous page) : links sections in the order that you'd read the document
633 * a suffix is expected to be present so test before using
634 */
635 public String processOID(String doc_id, String top, String suff, int sibling_num) {
636
637 // send off request to get sibling etc. information from Fedora
638 Element response = null;
639 String[] children = null;
640 if(doc_id.startsWith("CL")) { // classifiernode
641 response = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseStructure(this.cluster_name, "ClassifierBrowse", new String[]{doc_id},
642 new String[]{"children"}, new String[]{"siblingPosition"}));
643 NodeList nl = response.getElementsByTagName(GSXML.NODE_STRUCTURE_ELEM);
644 if(nl.getLength() > 0) {
645 Element nodeStructure = (Element)nl.item(0);
646
647 if(nodeStructure != null) {
648 Element root = (Element) GSXML.getChildByTagName(nodeStructure, GSXML.CLASS_NODE_ELEM);
649 if(root != null) { // get children
650 NodeList classNodes = root.getElementsByTagName(GSXML.CLASS_NODE_ELEM);
651 if(classNodes != null) {
652 children = new String[classNodes.getLength()];
653 for(int i = 0; i < children.length; i++) {
654 Element child = (Element)classNodes.item(i);
655 children[i] = child.getAttribute(GSXML.NODE_ID_ATT);
656 }
657 }
658 }
659 }
660 }
661 } else { // documentnode
662 response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentStructure(this.cluster_name, new String[]{doc_id},
663 new String[]{"children"}, new String[]{"siblingPosition"}));
664 String path = GSPath.createPath(new String[]{GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER,
665 GSXML.DOC_NODE_ELEM, GSXML.NODE_STRUCTURE_ELEM, GSXML.DOC_NODE_ELEM});
666 Element parentDocNode = (Element) GSXML.getNodeByPath(response, path);
667
668 if (parentDocNode == null) {
669 return top;
670 } // else
671 NodeList docNodes = parentDocNode.getElementsByTagName(GSXML.DOC_NODE_ELEM); // only children should remain, since that's what we requested
672 if(docNodes.getLength() > 0) {
673 children = new String[docNodes.getLength()];
674
675 for(int i = 0; i < children.length; i++) {
676 Element e = (Element)docNodes.item(i);
677 children[i] = e.getAttribute(GSXML.NODE_ID_ATT);
678 }
679 } else { // return root node
680 children = new String[]{doc_id};
681 }
682 }
683
684 if (suff.equals("fc")) {
685 return children[0];
686 } else if (suff.equals("lc")) {
687 return children[children.length-1];
688 } else {
689 if (suff.equals("ss")) {
690 return children[sibling_num-1];
691 }
692 // find the position that we are at.
693 int i=0;
694 while(i<children.length) {
695 if (children[i].equals(top)) {
696 break;
697 }
698 i++;
699 }
700
701 if (suff.equals("ns")) {
702 if (i==children.length-1) {
703 return children[i];
704 }
705 return children[i+1];
706 } else if (suff.equals("ps")) {
707 if (i==0) {
708 return children[i];
709 }
710 return children[i-1];
711 }
712 }
713
714 return top;
715 }
716
717
718 protected Element getResponseAsDOM(String response) {
719 if(response == null) { // will not be the case, because an empty
720 return null; // response message will be sent instead
721 }
722
723 Element message = null;
724 try{
725 // turn the String xml response into a DOM tree:
726 DocumentBuilder builder
727 = DocumentBuilderFactory.newInstance().newDocumentBuilder();
728 Document doc
729 = builder.parse(new InputSource(new StringReader(response)));
730 message = doc.getDocumentElement();
731 } catch(Exception e){
732 if(response == null) {
733 response = "";
734 }
735 logger.error("An error occurred while trying to parse the response: ");
736 logger.error(response);
737 logger.error(e.getMessage());
738 }
739
740 // Error elements in message will be processed outside of here, just return the message
741 return message;
742 }
743
744
745 /** returns a specific service description */
746 protected Element getServiceDescription(Document doc, String service, String lang, String subset) {
747 if(!lang.equals(prevLanguage)) {
748 prevLanguage = lang;
749 fedoraServicesAPIA.setLanguage(lang);
750 }
751 String serviceResponse = fedoraServicesAPIA.describeService(service);
752 Element response = getResponseAsDOM(serviceResponse);
753
754 // should be no chance of an npe, since FedoraGS3 lists the services, so will have descriptions for each
755 Element e = (Element)response.getElementsByTagName(GSXML.SERVICE_ELEM).item(0);
756 e = (Element)doc.importNode(e, true);
757 return e;
758 }
759
760
761
762 public static class BasicTextMacroResolver extends MacroResolver {
763 private static final Pattern p_back_slash = Pattern.compile("\\\"");// create a pattern "\\\"", but it matches both " and \"
764
765 public String resolve(String text, String lang, String scope, String doc_oid)
766 {
767
768 if (text == null || text.equals("")) {
769 return text;
770 }
771 if (!scope.equals(SCOPE_TEXT) || text_macros.size()==0) {
772 return text;
773 }
774
775 java.util.ArrayList macros = text_macros;
776 for (int i=0; i<macros.size(); i++) {
777 String new_text = null;
778 Macro m = (Macro)macros.get(i);
779
780 if(m.type == TYPE_TEXT) {
781 // make sure we resolve any macros in the text
782
783 if(text.contains(m.macro)) {
784 if (m.resolve) {
785 new_text = this.resolve(m.text, lang, scope, doc_oid);
786 } else {
787 new_text = m.text;
788 }
789 text = StringUtils.replace(text, m.macro, new_text);//text = text.replaceAll(m.macro, new_text);
790 if (m.macro.endsWith("\\\\")) { // to get rid of "\" from the string like: "src="http://www.greenstone.org:80/.../mw.gif\">"
791 Matcher m_slash = p_back_slash.matcher(text);
792 String clean_str = "";
793 int s=0;
794 while (m_slash.find()) {
795 if (!text.substring(m_slash.end()-2, m_slash.end()-1).equals("\\")) {
796 clean_str = clean_str + text.substring(s,m_slash.end()-1); // it matches ", so get a substring before "
797 }else{
798 clean_str = clean_str + text.substring(s,m_slash.end()-2);// it matches \", so get a substring before \
799 }
800 s = m_slash.end();// get the index of the last match
801 clean_str = clean_str + "\"";
802 }
803 text = clean_str + text.substring(s,text.length());
804 }
805 }
806 }
807 }
808 return text;
809 }
810 }
811
812
813}
814
Note: See TracBrowser for help on using the repository browser.