source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24361

Last change on this file since 24361 was 24361, checked in by ak19, 13 years ago

Log messages that were written out as errors but which are actually informative debug statements are now written out as debug statements

  • Property svn:keywords set to Author Date Id Revision
File size: 25.2 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35import java.util.*;
36
37import javax.xml.parsers.DocumentBuilder;
38import javax.xml.parsers.DocumentBuilderFactory;
39
40import org.xml.sax.*;
41import javax.xml.parsers.SAXParserFactory;
42import javax.xml.parsers.ParserConfigurationException;
43import javax.xml.parsers.SAXParser;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51 * Represents a collection in Greenstone. A collection is an extension of
52 * a ServiceCluster - it has local data that the services use.
53 *
54 * @author <a href="mailto:[email protected]">Katherine Don</a>
55 * @see ModuleInterface
56 */
57public class Collection
58 extends ServiceCluster {
59
60 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
61
62 /** is this collection being tidied */
63 protected boolean useBook = false;
64 /** is this collection public or private */
65 protected boolean is_public = true;
66
67 /** does this collection provide the OAI service */
68 protected boolean has_oai = true;
69 /** time when this collection was built */
70 protected long lastmodified = 0;
71 /** earliestDatestamp of this collection. Necessary for OAI */
72 protected long earliestDatestamp = 0;
73
74
75 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
76 * the OAIPMH serviceRack
77 */
78 //protected Element service_rack_list = null;
79
80 protected XMLTransformer transformer = null;
81 /** same as setClusterName */
82 public void setCollectionName(String name) {
83 setClusterName(name);
84 }
85
86 public Collection() {
87 super();
88 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
89
90 }
91
92 /**
93 * Configures the collection.
94 *
95 * gsdlHome and collectionName must be set before configure is called.
96 *
97 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
98 * collection metadata is obtained, and services loaded.
99 *
100 * @return true/false on success/fail
101 */
102 public boolean configure() {
103
104 if (this.site_home == null || this.cluster_name== null) {
105 logger.error("Collection: site_home and collection_name must be set before configure called!");
106 return false;
107 }
108
109 Element coll_config_xml = loadCollConfigFile();
110 Element build_config_xml = loadBuildConfigFile();
111
112 if (coll_config_xml==null||build_config_xml==null) {
113 return false;
114 }
115
116 // get the collection type attribute
117 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
118 if(search!=null) {
119 col_type = search.getAttribute(GSXML.TYPE_ATT);
120 }
121
122 // process the metadata and display items
123 findAndLoadInfo(coll_config_xml, build_config_xml);
124
125 // now do the services
126 configureServiceRacks(coll_config_xml, build_config_xml);
127
128 return true;
129
130 }
131
132 public boolean useBook() {
133 return useBook;
134 }
135
136 public boolean isPublic() {
137 return is_public;
138 }
139 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
140 // amongst all oai collections in the repository. May be useful generally.
141 public long getLastmodified() {
142 return lastmodified;
143 }
144 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
145 public long getEarliestDatestamp() {
146 return earliestDatestamp;
147 }
148
149 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
150 * 11/06/2007 xiao
151 */
152 public boolean hasOAI() {
153 return has_oai;
154 }
155 /**
156 * load in the collection config file into a DOM Element
157 */
158 protected Element loadCollConfigFile() {
159
160 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
161
162 if (!coll_config_file.exists()) {
163 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
164 return null;
165 }
166 // get the xml for both files
167 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
168 Element coll_config_elem = null;
169 if (coll_config_doc != null) {
170 coll_config_elem = coll_config_doc.getDocumentElement();
171 }
172 return coll_config_elem;
173
174 }
175
176 /**
177 * load in the collection build config file into a DOM Element
178 */
179 protected Element loadBuildConfigFile() {
180
181 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
182 if (!build_config_file.exists()) {
183 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
184 return null;
185 }
186 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
187 Element build_config_elem = null;
188 if (build_config_doc != null) {
189 build_config_elem = build_config_doc.getDocumentElement();
190 }
191
192 lastmodified = build_config_file.lastModified();
193
194 return build_config_elem;
195 }
196
197 /**
198 * find the metadata and display elems from the two config files and add it to the appropriate lists
199 */
200 protected boolean findAndLoadInfo(Element coll_config_xml,
201 Element build_config_xml){
202
203 // metadata
204 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
205 addMetadata(meta_list);
206 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
207 addMetadata(meta_list);
208
209 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
210 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
211 addMetadata(meta_list);
212
213 // display stuff
214 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
215 if (display_list != null) {
216 resolveMacros(display_list);
217 addDisplayItems(display_list);
218 }
219
220 //check whether the html are tidy or not
221 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
222 if (import_list != null) {
223 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
224 addPlugins(plugin_list);
225 if (plugin_list != null){
226 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
227 if (plugin_elem != null) {
228 //get the option
229 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
230 if (option_elem != null) {
231 useBook = true;
232 }
233 }
234 }
235 }
236 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
237 if (useBook == true)
238 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
239 else
240 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
241 addMetadata(meta_list);
242
243 // check whether we are public or not
244 if (meta_list != null) {
245 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
246 if (meta_elem != null) {
247 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
248 if (value.equals("false")) {
249 is_public = false;
250 }
251 }
252 }
253 return true;
254
255 }
256
257 protected boolean configureServiceRacks(Element coll_config_xml,
258 Element build_config_xml){
259 clearServices();
260 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
261 configureServiceRackList(service_list, coll_config_xml);
262
263 // collection Config may also contain manually added service racks
264 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
265 if (service_list != null) {
266 configureServiceRackList(service_list, build_config_xml);
267
268 // Check for oai
269 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
270 if (oai_service_rack == null) {
271 has_oai = false;
272 logger.info("No oai for collection: " + this.cluster_name);
273
274 } else {
275 has_oai = true;
276
277 // extract earliestDatestamp from the buildconfig.xml for OAI
278 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
279
280 if(metadata_list != null) {
281 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
282 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
283 for (int i = 0; i < children.getLength(); i++) {
284 Element metadata = (Element)children.item(i);
285 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
286 String earliestDatestampStr = GSXML.getValue(metadata);
287 if(!earliestDatestampStr.equals("")) {
288 earliestDatestamp = Long.parseLong(earliestDatestampStr);
289 }
290 break; // found a metadata element with name=earliestDatestamp in buildconfig
291 }
292 }
293 }
294
295 // If at the end of this, there is no value for earliestDatestamp, print out a warning
296 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
297
298 }
299 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
300 // explicitly set has_oai to false here, since it's initialised to true by default
301 has_oai = false;
302 }
303 return true;
304 }
305
306 protected boolean resolveMacros(Element display_list) {
307 if (display_list==null) return false;
308 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
309 if (displaynodes.getLength()>0) {
310 String http_site = this.site_http_address;
311 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
312 for(int k=0; k<displaynodes.getLength(); k++) {
313 Element d = (Element) displaynodes.item(k);
314 String text = GSXML.getNodeText(d);
315 text = StringUtils.replace(text, "_httpsite_", http_site);
316 text = StringUtils.replace(text, "_httpcollection_", http_collection);
317 GSXML.setNodeText(d, text);
318 }
319 }
320 return true;
321 }
322 /**
323 * do a configure on only part of the collection
324 */
325 protected boolean configureSubset(String subset) {
326
327 // need the coll config files
328 Element coll_config_elem = loadCollConfigFile();
329 Element build_config_elem = loadBuildConfigFile();
330 if (coll_config_elem == null||build_config_elem == null) {
331 // wont be able to do any of the requests
332 return false;
333 }
334
335 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
336 return configureServiceRacks(coll_config_elem, build_config_elem);
337 }
338
339 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
340 return findAndLoadInfo(coll_config_elem, build_config_elem);
341
342 }
343
344 logger.error("Collection: cant process system request, configure "+subset);
345 return false;
346 }
347
348 /** handles requests made to the ServiceCluster itself
349 *
350 * @param req - the request Element- <request>
351 * @return the result Element - should be <response>
352 */
353 protected Element processMessage(Element request) {
354
355 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
356 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
357 String type = request.getAttribute(GSXML.TYPE_ATT);
358 String lang = request.getAttribute(GSXML.LANG_ATT);
359 response.setAttribute(GSXML.TYPE_ATT, type);
360
361 logger.debug("Collection received a message, attempting to process");
362
363 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
364 logger.error("Received format string request");
365
366 String subaction = request.getAttribute("subaction");
367 logger.error("Subaction is " + subaction);
368
369 String service = request.getAttribute("service");
370 logger.error("Service is " + service);
371
372 String classifier = null;
373 if(service.equals("ClassifierBrowse"))
374 {
375 classifier = request.getAttribute("classifier");
376 logger.error("Classifier is " + classifier);
377 }
378
379 //logger.error("Format string: " + format_string);
380 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
381
382 // check for version file
383
384 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
385 logger.error("Directory is " + directory);
386
387 String version_filename = "";
388 if(service.equals("ClassifierBrowse"))
389 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
390 else
391 version_filename = directory + "query_format_statement_version.txt";
392
393 File version_file = new File(version_filename);
394 logger.error("Version filename is " + version_filename);
395
396
397 if(subaction.equals("update"))
398 {
399 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
400 //String format_string = GSXML.getNodeText(format_element);
401 Element format_statement = (Element) format_element.getFirstChild();
402
403
404 String version_number = "1";
405 BufferedWriter writer;
406
407 try{
408
409 if(version_file.exists())
410 {
411 // Read version
412 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
413 version_number = reader.readLine();
414 int aInt = Integer.parseInt(version_number) + 1;
415 version_number = Integer.toString(aInt);
416 reader.close();
417 }
418 else{
419 // Create
420 version_file.createNewFile();
421 writer = new BufferedWriter(new FileWriter(version_filename));
422 writer.write(version_number);
423 writer.close();
424 }
425
426 // Write version file
427 String format_statement_filename = "";
428
429 if(service.equals("ClassifierBrowse"))
430 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
431 else
432 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
433
434 logger.error("Format statement filename is " + format_statement_filename);
435
436 // Write format statement
437 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
438 writer = new BufferedWriter(new FileWriter(format_statement_filename));
439 writer.write(format_string);
440 writer.close();
441
442 // Update version number
443 writer = new BufferedWriter(new FileWriter(version_filename));
444 writer.write(version_number);
445 writer.close();
446
447 } catch (IOException e) {
448 logger.error("IO Exception "+e);
449 }
450 }
451
452 if(subaction.equals("saveDocument"))
453 {
454 int k;
455 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
456 //String format_string = GSXML.getNodeText(format_element);
457 // Get display tag
458 Element display_format = (Element) format_element.getFirstChild();
459
460 logger.error("I have received a save document request");
461 String format_string = GSXML.xmlNodeToString(display_format);
462 logger.error("Param="+format_string);
463 String collection_config = directory + "collectionConfig.xml";
464 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
465
466 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
467
468 // Get display child
469 if(GSXML.getChildByTagName(current_node, "display") == null)
470 {
471 logger.error("ERROR: does not have a display child");
472 // well then create a format tag
473 Element display_tag = config.createElement("display");
474 current_node = (Node) current_node.appendChild(display_tag);
475 //current_node = (Node) format_tag;
476 }
477
478 else{
479 current_node = GSXML.getChildByTagName(current_node, "display");
480 }
481
482 if(GSXML.getChildByTagName(current_node, "format") == null)
483 {
484 logger.error("ERROR: does not have a format child");
485 // well then create a format tag
486 Element format_tag = config.createElement("format");
487 current_node.appendChild(format_tag);
488 //current_node = (Node) format_tag;
489 }
490
491
492 current_node.replaceChild(config.importNode(display_format,true), GSXML.getChildByTagName(current_node, "format"));
493
494 logger.error(GSXML.xmlNodeToString(current_node));
495
496 logger.error("Convert config to string");
497 String new_config = this.converter.getString(config);
498
499 new_config = StringUtils.replace(new_config, "&lt;", "<");
500 new_config = StringUtils.replace(new_config, "&gt;", ">");
501 new_config = StringUtils.replace(new_config, "&quot;", "\"");
502
503 try{
504 // Write to file (not original! for now)
505 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
506 writer.write(new_config);
507 writer.close();
508 logger.error("All is happy with collection saveDocument");
509 } catch (IOException e) {
510 logger.error("IO Exception "+e);
511 }
512 }
513
514 if(subaction.equals("save"))
515 {
516 logger.error("SAVE format statement");
517
518 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
519 //String format_string = GSXML.getNodeText(format_element);
520 Element format_statement = (Element) format_element.getFirstChild();
521
522 try{
523
524 // open collectionConfig.xml and read in to w3 Document
525 String collection_config = directory + "collectionConfig.xml";
526 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
527
528 //String tag_name = "";
529 int k;
530 int index;
531 Element elem;
532 // Try importing entire tree to this.doc so we can add and remove children at ease
533 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
534 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
535 NodeList current_node_list;
536
537 logger.error("Service is "+service);
538
539 if(service.equals("ClassifierBrowse"))
540 {
541 //tag_name = "browse";
542 // if CLX then need to look in <classifier> X then <format>
543 // default is <browse><format>
544
545 logger.error("Looking for browse");
546 current_node = GSXML.getChildByTagName(current_node, "browse");
547
548 // find CLX
549 if(classifier != null)
550 {
551 logger.error("Classifier is not null");
552 logger.error("Classifier is "+classifier);
553 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
554 index = Integer.parseInt(classifier.substring(2)) - 1;
555 logger.error("classifier index is "+index);
556 // index should be given by X-1
557 current_node = current_node_list.item(index);
558 // what if classifier does not have a format tag?
559 if(GSXML.getChildByTagName(current_node, "format") == null)
560 {
561 logger.error("ERROR: valid classifier but does not have a format child");
562 // well then create a format tag
563 Element format_tag = config.createElement("format");
564 current_node.appendChild(format_tag);
565 //current_node = (Node) format_tag;
566 }
567 }
568 else{
569 logger.error("Classifier is null");
570 // To support all classifiers, set classifier to null? There is the chance here that the format tag does not exist
571 if(GSXML.getChildByTagName(current_node, "format") == null)
572 {
573 logger.error("ERROR: classifier does not have a format child");
574 // well then create a format tag
575 Element format_tag = config.createElement("format");
576 current_node.appendChild(format_tag);
577 //current_node = (Node) format_tag;
578 }
579 }
580 }
581 else if(service.equals("AllClassifierBrowse"))
582 {
583 logger.error("Looking for browse");
584 current_node = GSXML.getChildByTagName(current_node, "browse");
585 if(GSXML.getChildByTagName(current_node, "format") == null)
586 {
587 logger.error("ERROR AllClassifierBrowse: all classifiers do not have a format child");
588 // well then create a format tag
589 Element format_tag = config.createElement("format");
590 current_node.appendChild(format_tag);
591 //current_node = (Node) format_tag;
592 }
593 }
594 else
595 {
596 // look in <format> with no attributes
597 logger.error("I presume this is search");
598
599 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
600 for(k=0; k<current_node_list.getLength(); k++)
601 {
602 current_node = current_node_list.item(k);
603 // if current_node has no attributes then break
604 elem = (Element) current_node;
605 if(elem.hasAttribute("name")==false)
606 break;
607 }
608 }
609
610 current_node.replaceChild(config.importNode(format_statement,true), GSXML.getChildByTagName(current_node, "format"));
611
612 // Now convert config document to string for writing to file
613 logger.error("Convert config to string");
614 String new_config = this.converter.getString(config);
615
616 new_config = StringUtils.replace(new_config, "&lt;", "<");
617 new_config = StringUtils.replace(new_config, "&gt;", ">");
618 new_config = StringUtils.replace(new_config, "&quot;", "\"");
619
620 // Write to file (not original! for now)
621 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
622 writer.write(new_config);
623 writer.close();
624 logger.error("All is happy with collection");
625
626 } catch( Exception ex ) {
627 logger.error("There was an exception "+ex);
628
629 StringWriter sw = new StringWriter();
630 PrintWriter pw = new PrintWriter(sw, true);
631 ex.printStackTrace(pw);
632 pw.flush();
633 sw.flush();
634 logger.error(sw.toString());
635 }
636
637 }
638 }
639 else { // unknown type
640 return super.processMessage(request);
641
642 }
643 return response;
644 }
645
646}
647
648
649
650
Note: See TracBrowser for help on using the repository browser.