source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24222

Last change on this file since 24222 was 24222, checked in by sjm84, 13 years ago

Removed a debug statement from previous commit

  • Property svn:keywords set to Author Date Id Revision
File size: 22.2 KB
Line 
1/*
2* Collection.java
3* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4*
5* This program is free software; you can redistribute it and/or modify
6* it under the terms of the GNU General Public License as published by
7* the Free Software Foundation; either version 2 of the License, or
8* (at your option) any later version.
9*
10* This program is distributed in the hope that it will be useful,
11* but WITHOUT ANY WARRANTY; without even the implied warranty of
12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13* GNU General Public License for more details.
14*
15* You should have received a copy of the GNU General Public License
16* along with this program; if not, write to the Free Software
17* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35import java.util.*;
36
37import javax.xml.parsers.DocumentBuilder;
38import javax.xml.parsers.DocumentBuilderFactory;
39
40import org.xml.sax.*;
41import javax.xml.parsers.SAXParserFactory;
42import javax.xml.parsers.ParserConfigurationException;
43import javax.xml.parsers.SAXParser;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51* Represents a collection in Greenstone. A collection is an extension of
52* a ServiceCluster - it has local data that the services use.
53*
54* @author <a href="mailto:[email protected]">Katherine Don</a>
55* @see ModuleInterface
56*/
57public class Collection
58extends ServiceCluster {
59
60 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
61
62 /** is this collection being tidied */
63 protected boolean useBook = false;
64 /** is this collection public or private */
65 protected boolean is_public = true;
66
67 /** does this collection provide the OAI service */
68 protected boolean has_oai = true;
69 /** time when this collection was built */
70 protected long lastmodified = 0;
71 /** earliestDatestamp of this collection. Necessary for OAI */
72 protected long earliestDatestamp = 0;
73
74
75 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
76 * the OAIPMH serviceRack
77 */
78 //protected Element service_rack_list = null;
79
80 protected XMLTransformer transformer = null;
81 /** same as setClusterName */
82 public void setCollectionName(String name) {
83 setClusterName(name);
84 }
85
86 public Collection() {
87 super();
88 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
89
90 }
91
92 /**
93 * Configures the collection.
94 *
95 * gsdlHome and collectionName must be set before configure is called.
96 *
97 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
98 * collection metadata is obtained, and services loaded.
99 *
100 * @return true/false on success/fail
101 */
102 public boolean configure() {
103
104 if (this.site_home == null || this.cluster_name== null) {
105 logger.error("Collection: site_home and collection_name must be set before configure called!");
106 return false;
107 }
108
109 Element coll_config_xml = loadCollConfigFile();
110 Element build_config_xml = loadBuildConfigFile();
111
112 if (coll_config_xml==null||build_config_xml==null) {
113 return false;
114 }
115
116 // get the collection type attribute
117 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
118 if(search != null) {
119 col_type = search.getAttribute(GSXML.TYPE_ATT);
120 }
121
122 Element browse = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.INFODB_ELEM);
123 if(browse != null) {
124 db_type = browse.getAttribute(GSXML.TYPE_ATT);
125 }
126 else
127 {
128 db_type = "gdbm"; //The default
129 }
130
131 // process the metadata and display items
132 findAndLoadInfo(coll_config_xml, build_config_xml);
133
134 // now do the services
135 configureServiceRacks(coll_config_xml, build_config_xml);
136
137 return true;
138
139 }
140
141 public boolean useBook() {
142 return useBook;
143 }
144
145 public boolean isPublic() {
146 return is_public;
147 }
148 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
149 // amongst all oai collections in the repository. May be useful generally.
150 public long getLastmodified() {
151 return lastmodified;
152 }
153 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
154 public long getEarliestDatestamp() {
155 return earliestDatestamp;
156 }
157
158 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
159 * 11/06/2007 xiao
160 */
161 public boolean hasOAI() {
162 return has_oai;
163 }
164 /**
165 * load in the collection config file into a DOM Element
166 */
167 protected Element loadCollConfigFile() {
168
169 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
170
171 if (!coll_config_file.exists()) {
172 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
173 return null;
174 }
175 // get the xml for both files
176 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
177 Element coll_config_elem = null;
178 if (coll_config_doc != null) {
179 coll_config_elem = coll_config_doc.getDocumentElement();
180 }
181 return coll_config_elem;
182
183 }
184
185 /**
186 * load in the collection build config file into a DOM Element
187 */
188 protected Element loadBuildConfigFile() {
189
190 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
191 if (!build_config_file.exists()) {
192 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
193 return null;
194 }
195 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
196 Element build_config_elem = null;
197 if (build_config_doc != null) {
198 build_config_elem = build_config_doc.getDocumentElement();
199 }
200
201 lastmodified = build_config_file.lastModified();
202
203 return build_config_elem;
204 }
205
206 /**
207 * find the metadata and display elems from the two config files and add it to the appropriate lists
208 */
209 protected boolean findAndLoadInfo(Element coll_config_xml,
210 Element build_config_xml){
211
212 // metadata
213 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
214 addMetadata(meta_list);
215 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
216 addMetadata(meta_list);
217
218 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
219 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
220 addMetadata(meta_list);
221
222 // display stuff
223 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
224 if (display_list != null) {
225 resolveMacros(display_list);
226 addDisplayItems(display_list);
227 }
228
229 //check whether the html are tidy or not
230 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
231 if (import_list != null) {
232 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
233 addPlugins(plugin_list);
234 if (plugin_list != null){
235 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
236 if (plugin_elem != null) {
237 //get the option
238 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
239 if (option_elem != null) {
240 useBook = true;
241 }
242 }
243 }
244 }
245 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
246 if (useBook == true)
247 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
248 else
249 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
250 addMetadata(meta_list);
251
252 // check whether we are public or not
253 if (meta_list != null) {
254 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
255 if (meta_elem != null) {
256 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
257 if (value.equals("false")) {
258 is_public = false;
259 }
260 }
261 }
262 return true;
263
264 }
265
266 protected boolean configureServiceRacks(Element coll_config_xml,
267 Element build_config_xml){
268 clearServices();
269 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
270 configureServiceRackList(service_list, coll_config_xml);
271
272 // collection Config may also contain manually added service racks
273 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
274 if (service_list != null) {
275 configureServiceRackList(service_list, build_config_xml);
276
277 // Check for oai
278 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
279 if (oai_service_rack == null) {
280 has_oai = false;
281 logger.info("No oai for collection: " + this.cluster_name);
282
283 } else {
284 has_oai = true;
285
286 // extract earliestDatestamp from the buildconfig.xml for OAI
287 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
288
289 if(metadata_list != null) {
290 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
291 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
292 for (int i = 0; i < children.getLength(); i++) {
293 Element metadata = (Element)children.item(i);
294 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
295 String earliestDatestampStr = GSXML.getValue(metadata);
296 if(!earliestDatestampStr.equals("")) {
297 earliestDatestamp = Long.parseLong(earliestDatestampStr);
298 }
299 break; // found a metadata element with name=earliestDatestamp in buildconfig
300 }
301 }
302 }
303
304 // If at the end of this, there is no value for earliestDatestamp, print out a warning
305 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
306
307 }
308 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
309 // explicitly set has_oai to false here, since it's initialised to true by default
310 has_oai = false;
311 }
312 return true;
313 }
314
315 protected boolean resolveMacros(Element display_list) {
316 if (display_list==null) return false;
317 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
318 if (displaynodes.getLength()>0) {
319 String http_site = this.site_http_address;
320 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
321 for(int k=0; k<displaynodes.getLength(); k++) {
322 Element d = (Element) displaynodes.item(k);
323 String text = GSXML.getNodeText(d);
324 text = StringUtils.replace(text, "_httpsite_", http_site);
325 text = StringUtils.replace(text, "_httpcollection_", http_collection);
326 GSXML.setNodeText(d, text);
327 }
328 }
329 return true;
330 }
331 /**
332 * do a configure on only part of the collection
333 */
334 protected boolean configureSubset(String subset) {
335
336 // need the coll config files
337 Element coll_config_elem = loadCollConfigFile();
338 Element build_config_elem = loadBuildConfigFile();
339 if (coll_config_elem == null||build_config_elem == null) {
340 // wont be able to do any of the requests
341 return false;
342 }
343
344 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
345 return configureServiceRacks(coll_config_elem, build_config_elem);
346 }
347
348 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
349 return findAndLoadInfo(coll_config_elem, build_config_elem);
350
351 }
352
353 logger.error("Collection: cant process system request, configure "+subset);
354 return false;
355 }
356
357 /** handles requests made to the ServiceCluster itself
358 *
359 * @param req - the request Element- <request>
360 * @return the result Element - should be <response>
361 */
362 protected Element processMessage(Element request) {
363
364 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
365 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
366 String type = request.getAttribute(GSXML.TYPE_ATT);
367 String lang = request.getAttribute(GSXML.LANG_ATT);
368 response.setAttribute(GSXML.TYPE_ATT, type);
369
370 logger.error("Collection received a message, attempting to process");
371
372 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
373 logger.error("Received format string request");
374
375 String subaction = request.getAttribute("subaction");
376 logger.error("Subaction is " + subaction);
377
378 String service = request.getAttribute("service");
379 logger.error("Service is " + service);
380
381 String classifier = null;
382 if(service.equals("ClassifierBrowse"))
383 {
384 classifier = request.getAttribute("classifier");
385 logger.error("Classifier is " + classifier);
386 }
387
388 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
389 //String format_string = GSXML.getNodeText(format_element);
390 Element format_statement = (Element) format_element.getFirstChild();
391
392 //logger.error("Format string: " + format_string);
393 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
394
395 // check for version file
396
397 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
398 logger.error("Directory is " + directory);
399
400 String version_filename = "";
401 if(service.equals("ClassifierBrowse"))
402 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
403 else
404 version_filename = directory + "query_format_statement_version.txt";
405
406 File version_file = new File(version_filename);
407 logger.error("Version filename is " + version_filename);
408
409
410 if(subaction.equals("update"))
411 {
412 String version_number = "1";
413 BufferedWriter writer;
414
415 try{
416
417 if(version_file.exists())
418 {
419 // Read version
420 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
421 version_number = reader.readLine();
422 int aInt = Integer.parseInt(version_number) + 1;
423 version_number = Integer.toString(aInt);
424 reader.close();
425 }
426 else{
427 // Create
428 version_file.createNewFile();
429 writer = new BufferedWriter(new FileWriter(version_filename));
430 writer.write(version_number);
431 writer.close();
432 }
433
434 // Write version file
435 String format_statement_filename = "";
436
437 if(service.equals("ClassifierBrowse"))
438 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
439 else
440 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
441
442 logger.error("Format statement filename is " + format_statement_filename);
443
444 // Write format statement
445 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
446 writer = new BufferedWriter(new FileWriter(format_statement_filename));
447 writer.write(format_string);
448 writer.close();
449
450 // Update version number
451 writer = new BufferedWriter(new FileWriter(version_filename));
452 writer.write(version_number);
453 writer.close();
454
455 } catch (IOException e) {
456 logger.error("IO Exception "+e);
457 }
458 }
459
460 if(subaction.equals("save"))
461 {
462 logger.error("SAVE format statement");
463
464 try{
465
466 // Convert format string to a document
467 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
468 //DocumentBuilder builder = factory.newDocumentBuilder();
469 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
470 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
471 //logger.error(input);
472 //InputSource is = new InputSource( new StringReader( input ) );
473 //logger.error("About to parse format string");
474 //Document format_statement = (Document) builder.parse( is );
475 //logger.error("Done parsing format string");
476
477 // open collectionConfig.xml and read in to w3 Document
478 String collection_config = directory + "collectionConfig.xml";
479 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
480
481 //String tag_name = "";
482 int k;
483 int index;
484 Element elem;
485 // Try importing entire tree to this.doc so we can add and remove children at ease
486 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
487 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
488 NodeList current_node_list;
489
490 logger.error("Service is "+service);
491
492 if(service.equals("ClassifierBrowse"))
493 {
494 //tag_name = "browse";
495 // if CLX then need to look in <classifier> X then <format>
496 // default is <browse><format>
497
498 logger.error("Looking for browse");
499 current_node = GSXML.getChildByTagName(current_node, "browse");
500
501 // find CLX
502 if(classifier != null)
503 {
504 logger.error("Classifier is not null");
505 logger.error("Classifier is "+classifier);
506 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
507 index = Integer.parseInt(classifier.substring(2)) - 1;
508 logger.error("classifier index is "+index);
509 // index should be given by X-1
510 current_node = current_node_list.item(index);
511 // what if classifier does not have a format tag?
512 if(GSXML.getChildByTagName(current_node, "format") == null)
513 {
514 logger.error("ERROR: valid classifier but does not have a format child");
515 // well then create a format tag
516 Element format_tag = config.createElement("format");
517 current_node = (Node) current_node.appendChild(format_tag);
518 //current_node = (Node) format_tag;
519 }
520
521 else{
522 current_node = GSXML.getChildByTagName(current_node, "format");
523 }
524
525 }
526 else{
527 logger.error("Classifier is null");
528 // To support all classifiers, set classifier to null? There is the chance here that the format tag does not exist
529 if(GSXML.getChildByTagName(current_node, "format") == null)
530 {
531 logger.error("ERROR: classifier does not have a format child");
532 // well then create a format tag
533 Element format_tag = config.createElement("format");
534 current_node = (Node) current_node.appendChild(format_tag);
535 //current_node = (Node) format_tag;
536 }
537 else
538 current_node = GSXML.getChildByTagName(current_node, "format");
539 }
540 }
541 else if(service.equals("AllClassifierBrowse"))
542 {
543 logger.error("Looking for browse");
544 current_node = GSXML.getChildByTagName(current_node, "browse");
545 if(GSXML.getChildByTagName(current_node, "format") == null)
546 {
547 logger.error("ERROR AllClassifierBrowse: all classifiers do not have a format child");
548 // well then create a format tag
549 Element format_tag = config.createElement("format");
550 current_node = (Node) current_node.appendChild(format_tag);
551 //current_node = (Node) format_tag;
552 }
553 else
554 current_node = GSXML.getChildByTagName(current_node, "format");
555 }
556 else
557 {
558 // look in <format> with no attributes
559 logger.error("I presume this is search");
560
561 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
562 for(k=0; k<current_node_list.getLength(); k++)
563 {
564 current_node = current_node_list.item(k);
565 // if current_node has no attributes then break
566 elem = (Element) current_node;
567 if(elem.hasAttribute("name")==false)
568 break;
569 }
570 }
571
572 // Current_node should be a format tag
573 elem = (Element) current_node;
574
575 logger.error("*Current_node = " + elem.getNodeName());
576
577 // seems we want to remove current child/ren and replace with format_statement's child/ren?
578
579 // remove existing
580 current_node_list = elem.getChildNodes();
581 logger.error("About to remove old children");
582 for(k=0; k<current_node_list.getLength(); k++)
583 {
584 current_node = elem.removeChild(current_node_list.item(k));
585 }
586
587 logger.error("old nodes removed");
588
589 // append new but we have a string!
590 //GSXML.setNodeText(elem, "THIS IS A TEST");
591 //GSXML.setNodeText(elem, format_string);
592
593 current_node_list = format_statement.getChildNodes();
594 for(k=0; k<current_node_list.getLength(); k++)
595 {
596 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
597 //transformed = ((Document)transformed).getDocumentElement();
598 //logger.error("Node type: "+current_node_list.item(k).getNodeType());
599 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
600 elem.appendChild(config.importNode(current_node_list.item(k),true));
601 }
602 logger.error("new nodes added");
603
604 //String text = GSXML.getNodeText(elem);
605 //logger.error(text);
606 //text = text.replaceAll("_httpsite_", http_site);
607 //text = text.replaceAll("_httpcollection_", http_collection);
608 //GSXML.setNodeText(d, text);
609
610 // Now convert config document to string for writing to file
611 logger.error("Convert config to string");
612 String new_config = this.converter.getString(config);
613
614 new_config = StringUtils.replace(new_config, "&lt;", "<");
615 new_config = StringUtils.replace(new_config, "&gt;", ">");
616 new_config = StringUtils.replace(new_config, "&quot;", "\"");
617
618 // Write to file (not original! for now)
619 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
620 writer.write(new_config);
621 writer.close();
622 logger.error("All is happy with collection");
623
624 } catch( Exception ex ) {
625 logger.error("There was an exception "+ex);
626
627 StringWriter sw = new StringWriter();
628 PrintWriter pw = new PrintWriter(sw, true);
629 ex.printStackTrace(pw);
630 pw.flush();
631 sw.flush();
632 logger.error(sw.toString());
633 }
634
635 }
636 }
637 else { // unknown type
638 return super.processMessage(request);
639
640 }
641 return response;
642 }
643
644}
645
646
647
648
Note: See TracBrowser for help on using the repository browser.