source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 23901

Last change on this file since 23901 was 23901, checked in by ak19, 13 years ago

Getting GS3's OAIserver to work again. New GS3 collections are OAI-enabled by default, since they automatically get the OAIPMH ServiceRack element added into their collectionConfig.xml (from gs2build's modelcol of collectionConfig.xml). Small adjustments to the code ensure that collections that are not OAI-enabled do not appear in the list of cross-collection Identifiers and Records, and that whenever such collections are present, the Identifiers and Records can still be collected for the OAI-enabled collections without the ListIdentifiers and ListRecords requests failing.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.DocumentBuilderFactory;
38
39import org.xml.sax.*;
40import javax.xml.parsers.SAXParserFactory;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Represents a collection in Greenstone. A collection is an extension of
51 * a ServiceCluster - it has local data that the services use.
52 *
53 * @author <a href="mailto:[email protected]">Katherine Don</a>
54 * @see ModuleInterface
55 */
56public class Collection
57 extends ServiceCluster {
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
60
61 /** is this collection being tidied */
62 protected boolean useBook = false;
63 /** is this collection public or private */
64 protected boolean is_public = true;
65
66 /** does this collection provide the OAI service */
67 protected boolean has_oai = true;
68 /** time when this collection was built */
69 protected long lastmodified = 0;
70
71 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
72 * the OAIPMH serviceRack
73 */
74 //protected Element service_rack_list = null;
75
76 protected XMLTransformer transformer = null;
77 /** same as setClusterName */
78 public void setCollectionName(String name) {
79 setClusterName(name);
80 }
81
82 public Collection() {
83 super();
84 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
85
86 }
87
88 /**
89 * Configures the collection.
90 *
91 * gsdlHome and collectionName must be set before configure is called.
92 *
93 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
94 * collection metadata is obtained, and services loaded.
95 *
96 * @return true/false on success/fail
97 */
98 public boolean configure() {
99
100 if (this.site_home == null || this.cluster_name== null) {
101 logger.error("Collection: site_home and collection_name must be set before configure called!");
102 return false;
103 }
104
105 Element coll_config_xml = loadCollConfigFile();
106 Element build_config_xml = loadBuildConfigFile();
107
108 if (coll_config_xml==null||build_config_xml==null) {
109 return false;
110 }
111
112 // get the collection type attribute
113 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
114 if(search!=null) {
115 col_type = search.getAttribute(GSXML.TYPE_ATT);
116 }
117
118 // process the metadata and display items
119 findAndLoadInfo(coll_config_xml, build_config_xml);
120
121 // now do the services
122 configureServiceRacks(coll_config_xml, build_config_xml);
123
124 return true;
125
126 }
127
128 public boolean useBook() {
129 return useBook;
130 }
131
132 public boolean isPublic() {
133 return is_public;
134 }
135 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
136 public long getLastmodified() {
137 return lastmodified;
138 }
139 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
140 * 11/06/2007 xiao
141 */
142 public boolean hasOAI() {
143 return has_oai;
144 }
145 /**
146 * load in the collection config file into a DOM Element
147 */
148 protected Element loadCollConfigFile() {
149
150 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
151
152 if (!coll_config_file.exists()) {
153 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
154 return null;
155 }
156 // get the xml for both files
157 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
158 Element coll_config_elem = null;
159 if (coll_config_doc != null) {
160 coll_config_elem = coll_config_doc.getDocumentElement();
161 }
162 return coll_config_elem;
163
164 }
165
166 /**
167 * load in the collection build config file into a DOM Element
168 */
169 protected Element loadBuildConfigFile() {
170
171 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
172 if (!build_config_file.exists()) {
173 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
174 return null;
175 }
176 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
177 Element build_config_elem = null;
178 if (build_config_doc != null) {
179 build_config_elem = build_config_doc.getDocumentElement();
180 }
181
182 lastmodified = build_config_file.lastModified();
183
184 return build_config_elem;
185 }
186
187 /**
188 * find the metadata and display elems from the two config files and add it to the appropriate lists
189 */
190 protected boolean findAndLoadInfo(Element coll_config_xml,
191 Element build_config_xml){
192
193 // metadata
194 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
195 addMetadata(meta_list);
196 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
197 addMetadata(meta_list);
198
199 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
200 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
201 addMetadata(meta_list);
202
203 // display stuff
204 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
205 if (display_list != null) {
206 resolveMacros(display_list);
207 addDisplayItems(display_list);
208 }
209
210 //check whether the html are tidy or not
211 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
212 if (import_list != null) {
213 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
214 addPlugins(plugin_list);
215 if (plugin_list != null){
216 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlug");
217 if (plugin_elem != null) {
218 //get the option
219 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-tidy_html");
220 if (option_elem != null) {
221 useBook = true;
222 }
223 }
224 }
225 }
226 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
227 if (useBook == true)
228 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
229 else
230 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
231 addMetadata(meta_list);
232
233 // check whether we are public or not
234 if (meta_list != null) {
235 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
236 if (meta_elem != null) {
237 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
238 if (value.equals("false")) {
239 is_public = false;
240 }
241 }
242 }
243 return true;
244
245 }
246
247 protected boolean configureServiceRacks(Element coll_config_xml,
248 Element build_config_xml){
249 clearServices();
250 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
251 configureServiceRackList(service_list, coll_config_xml);
252
253 // collection Config may also contain manually added service racks
254 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
255 if (service_list != null) {
256 configureServiceRackList(service_list, build_config_xml);
257
258 // Check for oai
259 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
260 if (oai_service_rack == null) {
261 has_oai = false;
262 logger.info("No oai for collection: " + this.cluster_name);
263
264 } else {
265 has_oai = true;
266 }
267 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
268 // explicitly set has_oai to false here, since it's initialised to true by default
269 has_oai = false;
270 }
271 return true;
272 }
273
274 protected boolean resolveMacros(Element display_list) {
275 if (display_list==null) return false;
276 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
277 if (displaynodes.getLength()>0) {
278 String http_site = this.site_http_address;
279 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
280 for(int k=0; k<displaynodes.getLength(); k++) {
281 Element d = (Element) displaynodes.item(k);
282 String text = GSXML.getNodeText(d);
283 text = StringUtils.replace(text, "_httpsite_", http_site);
284 text = StringUtils.replace(text, "_httpcollection_", http_collection);
285 GSXML.setNodeText(d, text);
286 }
287 }
288 return true;
289 }
290 /**
291 * do a configure on only part of the collection
292 */
293 protected boolean configureSubset(String subset) {
294
295 // need the coll config files
296 Element coll_config_elem = loadCollConfigFile();
297 Element build_config_elem = loadBuildConfigFile();
298 if (coll_config_elem == null||build_config_elem == null) {
299 // wont be able to do any of the requests
300 return false;
301 }
302
303 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
304 return configureServiceRacks(coll_config_elem, build_config_elem);
305 }
306
307 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
308 return findAndLoadInfo(coll_config_elem, build_config_elem);
309
310 }
311
312 logger.error("Collection: cant process system request, configure "+subset);
313 return false;
314 }
315
316 /** handles requests made to the ServiceCluster itself
317 *
318 * @param req - the request Element- <request>
319 * @return the result Element - should be <response>
320 */
321 protected Element processMessage(Element request) {
322
323 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
324 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
325 String type = request.getAttribute(GSXML.TYPE_ATT);
326 String lang = request.getAttribute(GSXML.LANG_ATT);
327 response.setAttribute(GSXML.TYPE_ATT, type);
328
329 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
330 logger.error("Received format string request");
331
332 String subaction = request.getAttribute("subaction");
333 logger.error("Subaction is " + subaction);
334
335 String service = request.getAttribute("service");
336 logger.error("Service is " + service);
337
338 String classifier = null;
339 if(service.equals("ClassifierBrowse"))
340 {
341 classifier = request.getAttribute("classifier");
342 logger.error("Classifier is " + classifier);
343 }
344
345 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
346 //String format_string = GSXML.getNodeText(format_element);
347 Element format_statement = (Element) format_element.getFirstChild();
348
349 //logger.error("Format string: " + format_string);
350 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
351
352 // check for version file
353
354 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
355 logger.error("Directory is " + directory);
356
357 String version_filename = "";
358 if(service.equals("ClassifierBrowse"))
359 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
360 else
361 version_filename = directory + "query_format_statement_version.txt";
362
363 File version_file = new File(version_filename);
364 logger.error("Version filename is " + version_filename);
365
366
367 if(subaction.equals("update"))
368 {
369 String version_number = "1";
370 BufferedWriter writer;
371
372 try{
373
374 if(version_file.exists())
375 {
376 // Read version
377 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
378 version_number = reader.readLine();
379 int aInt = Integer.parseInt(version_number) + 1;
380 version_number = Integer.toString(aInt);
381 reader.close();
382 }
383 else{
384 // Create
385 version_file.createNewFile();
386 writer = new BufferedWriter(new FileWriter(version_filename));
387 writer.write(version_number);
388 writer.close();
389 }
390
391 // Write version file
392 String format_statement_filename = "";
393
394 if(service.equals("ClassifierBrowse"))
395 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
396 else
397 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
398
399 logger.error("Format statement filename is " + format_statement_filename);
400
401 // Write format statement
402 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
403 writer = new BufferedWriter(new FileWriter(format_statement_filename));
404 writer.write(format_string);
405 writer.close();
406
407 // Update version number
408 writer = new BufferedWriter(new FileWriter(version_filename));
409 writer.write(version_number);
410 writer.close();
411
412 } catch (IOException e) {
413 logger.error("IO Exception "+e);
414 }
415 }
416
417 if(subaction.equals("save"))
418 {
419 logger.error("SAVE format statement");
420
421 try{
422
423 // Convert format string to a document
424 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
425 //DocumentBuilder builder = factory.newDocumentBuilder();
426 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
427 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
428 //logger.error(input);
429 //InputSource is = new InputSource( new StringReader( input ) );
430 //logger.error("About to parse format string");
431 //Document format_statement = (Document) builder.parse( is );
432 //logger.error("Done parsing format string");
433
434 // open collectionConfig.xml and read in to w3 Document
435 String collection_config = directory + "collectionConfig.xml";
436 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
437
438 //String tag_name = "";
439 int k;
440 int index;
441 Element elem;
442 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
443 NodeList current_node_list;
444
445 if(service.equals("ClassifierBrowse"))
446 {
447 //tag_name = "browse";
448 // if CLX then need to look in <classifier> X then <format>
449 // default is <browse><format>
450
451 current_node = GSXML.getChildByTagName(current_node, "browse");
452
453 // find CLX
454 if(classifier != null)
455 {
456 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
457 index = Integer.parseInt(classifier.substring(2)) - 1;
458 // index should be given by X-1
459 current_node = current_node_list.item(index);
460 current_node = GSXML.getChildByTagName(current_node, "format");
461 }
462 else{
463 current_node = GSXML.getChildByTagName(current_node, "format");
464 }
465 }
466 else
467 {
468 // look in <format> with no attributes
469
470 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
471 for(k=0; k<current_node_list.getLength(); k++)
472 {
473 current_node = current_node_list.item(k);
474 // if current_node has no attributes then break
475 elem = (Element) current_node;
476 if(elem.hasAttribute("name")==false)
477 break;
478 }
479 }
480
481 // Current_node should be a format tag
482 elem = (Element) current_node;
483
484 logger.error("Current_node = " + elem.getNodeName());
485
486 // seems we want to remove current child/ren and replace with format_statement's child/ren?
487
488 // remove existing
489 current_node_list = elem.getChildNodes();
490 for(k=0; k<current_node_list.getLength(); k++)
491 {
492 current_node = elem.removeChild(current_node_list.item(k));
493 }
494
495 // append new but we have a string!
496 //GSXML.setNodeText(elem, "THIS IS A TEST");
497 //GSXML.setNodeText(elem, format_string);
498
499 current_node_list = format_statement.getChildNodes();
500 for(k=0; k<current_node_list.getLength(); k++)
501 {
502 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
503 //transformed = ((Document)transformed).getDocumentElement();
504 logger.error("Node type: "+current_node_list.item(k).getNodeType());
505 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
506 current_node = elem.appendChild(this.doc.importNode(current_node_list.item(k),true));
507 }
508
509 //String text = GSXML.getNodeText(elem);
510 //logger.error(text);
511 //text = text.replaceAll("_httpsite_", http_site);
512 //text = text.replaceAll("_httpcollection_", http_collection);
513 //GSXML.setNodeText(d, text);
514
515 // Now convert config document to string for writing to file
516 String new_config = this.converter.getString(config);
517
518 // Write to file (not original! for now)
519 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
520 writer.write(new_config);
521 writer.close();
522
523 } catch( Exception ex ) {
524 logger.error("There was an exception "+ex);
525
526 StringWriter sw = new StringWriter();
527 PrintWriter pw = new PrintWriter(sw, true);
528 ex.printStackTrace(pw);
529 pw.flush();
530 sw.flush();
531 logger.error(sw.toString());
532 }
533
534 }
535 }
536 else { // unknown type
537 return super.processMessage(request);
538
539 }
540 return response;
541 }
542
543}
544
545
546
547
Note: See TracBrowser for help on using the repository browser.