source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24236

Last change on this file since 24236 was 24236, checked in by sjb48, 13 years ago

Collection now supports the saving of document format edit features

  • Property svn:keywords set to Author Date Id Revision
File size: 25.2 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35import java.util.*;
36
37import javax.xml.parsers.DocumentBuilder;
38import javax.xml.parsers.DocumentBuilderFactory;
39
40import org.xml.sax.*;
41import javax.xml.parsers.SAXParserFactory;
42import javax.xml.parsers.ParserConfigurationException;
43import javax.xml.parsers.SAXParser;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51 * Represents a collection in Greenstone. A collection is an extension of
52 * a ServiceCluster - it has local data that the services use.
53 *
54 * @author <a href="mailto:[email protected]">Katherine Don</a>
55 * @see ModuleInterface
56 */
57public class Collection
58 extends ServiceCluster {
59
60 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
61
62 /** is this collection being tidied */
63 protected boolean useBook = false;
64 /** is this collection public or private */
65 protected boolean is_public = true;
66
67 /** does this collection provide the OAI service */
68 protected boolean has_oai = true;
69 /** time when this collection was built */
70 protected long lastmodified = 0;
71 /** earliestDatestamp of this collection. Necessary for OAI */
72 protected long earliestDatestamp = 0;
73
74
75 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
76 * the OAIPMH serviceRack
77 */
78 //protected Element service_rack_list = null;
79
80 protected XMLTransformer transformer = null;
81 /** same as setClusterName */
82 public void setCollectionName(String name) {
83 setClusterName(name);
84 }
85
86 public Collection() {
87 super();
88 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
89
90 }
91
92 /**
93 * Configures the collection.
94 *
95 * gsdlHome and collectionName must be set before configure is called.
96 *
97 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
98 * collection metadata is obtained, and services loaded.
99 *
100 * @return true/false on success/fail
101 */
102 public boolean configure() {
103
104 if (this.site_home == null || this.cluster_name== null) {
105 logger.error("Collection: site_home and collection_name must be set before configure called!");
106 return false;
107 }
108
109 Element coll_config_xml = loadCollConfigFile();
110 Element build_config_xml = loadBuildConfigFile();
111
112 if (coll_config_xml==null||build_config_xml==null) {
113 return false;
114 }
115
116 // get the collection type attribute
117 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
118 if(search!=null) {
119 col_type = search.getAttribute(GSXML.TYPE_ATT);
120 }
121
122 // process the metadata and display items
123 findAndLoadInfo(coll_config_xml, build_config_xml);
124
125 // now do the services
126 configureServiceRacks(coll_config_xml, build_config_xml);
127
128 return true;
129
130 }
131
132 public boolean useBook() {
133 return useBook;
134 }
135
136 public boolean isPublic() {
137 return is_public;
138 }
139 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
140 // amongst all oai collections in the repository. May be useful generally.
141 public long getLastmodified() {
142 return lastmodified;
143 }
144 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
145 public long getEarliestDatestamp() {
146 return earliestDatestamp;
147 }
148
149 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
150 * 11/06/2007 xiao
151 */
152 public boolean hasOAI() {
153 return has_oai;
154 }
155 /**
156 * load in the collection config file into a DOM Element
157 */
158 protected Element loadCollConfigFile() {
159
160 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
161
162 if (!coll_config_file.exists()) {
163 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
164 return null;
165 }
166 // get the xml for both files
167 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
168 Element coll_config_elem = null;
169 if (coll_config_doc != null) {
170 coll_config_elem = coll_config_doc.getDocumentElement();
171 }
172 return coll_config_elem;
173
174 }
175
176 /**
177 * load in the collection build config file into a DOM Element
178 */
179 protected Element loadBuildConfigFile() {
180
181 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
182 if (!build_config_file.exists()) {
183 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
184 return null;
185 }
186 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
187 Element build_config_elem = null;
188 if (build_config_doc != null) {
189 build_config_elem = build_config_doc.getDocumentElement();
190 }
191
192 lastmodified = build_config_file.lastModified();
193
194 return build_config_elem;
195 }
196
197 /**
198 * find the metadata and display elems from the two config files and add it to the appropriate lists
199 */
200 protected boolean findAndLoadInfo(Element coll_config_xml,
201 Element build_config_xml){
202
203 // metadata
204 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
205 addMetadata(meta_list);
206 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
207 addMetadata(meta_list);
208
209 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
210 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
211 addMetadata(meta_list);
212
213 // display stuff
214 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
215 if (display_list != null) {
216 resolveMacros(display_list);
217 addDisplayItems(display_list);
218 }
219
220 //check whether the html are tidy or not
221 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
222 if (import_list != null) {
223 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
224 addPlugins(plugin_list);
225 if (plugin_list != null){
226 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
227 if (plugin_elem != null) {
228 //get the option
229 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
230 if (option_elem != null) {
231 useBook = true;
232 }
233 }
234 }
235 }
236 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
237 if (useBook == true)
238 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
239 else
240 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
241 addMetadata(meta_list);
242
243 // check whether we are public or not
244 if (meta_list != null) {
245 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
246 if (meta_elem != null) {
247 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
248 if (value.equals("false")) {
249 is_public = false;
250 }
251 }
252 }
253 return true;
254
255 }
256
257 protected boolean configureServiceRacks(Element coll_config_xml,
258 Element build_config_xml){
259 clearServices();
260 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
261 configureServiceRackList(service_list, coll_config_xml);
262
263 // collection Config may also contain manually added service racks
264 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
265 if (service_list != null) {
266 configureServiceRackList(service_list, build_config_xml);
267
268 // Check for oai
269 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
270 if (oai_service_rack == null) {
271 has_oai = false;
272 logger.info("No oai for collection: " + this.cluster_name);
273
274 } else {
275 has_oai = true;
276
277 // extract earliestDatestamp from the buildconfig.xml for OAI
278 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
279
280 if(metadata_list != null) {
281 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
282 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
283 for (int i = 0; i < children.getLength(); i++) {
284 Element metadata = (Element)children.item(i);
285 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
286 String earliestDatestampStr = GSXML.getValue(metadata);
287 if(!earliestDatestampStr.equals("")) {
288 earliestDatestamp = Long.parseLong(earliestDatestampStr);
289 }
290 break; // found a metadata element with name=earliestDatestamp in buildconfig
291 }
292 }
293 }
294
295 // If at the end of this, there is no value for earliestDatestamp, print out a warning
296 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
297
298 }
299 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
300 // explicitly set has_oai to false here, since it's initialised to true by default
301 has_oai = false;
302 }
303 return true;
304 }
305
306 protected boolean resolveMacros(Element display_list) {
307 if (display_list==null) return false;
308 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
309 if (displaynodes.getLength()>0) {
310 String http_site = this.site_http_address;
311 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
312 for(int k=0; k<displaynodes.getLength(); k++) {
313 Element d = (Element) displaynodes.item(k);
314 String text = GSXML.getNodeText(d);
315 text = StringUtils.replace(text, "_httpsite_", http_site);
316 text = StringUtils.replace(text, "_httpcollection_", http_collection);
317 GSXML.setNodeText(d, text);
318 }
319 }
320 return true;
321 }
322 /**
323 * do a configure on only part of the collection
324 */
325 protected boolean configureSubset(String subset) {
326
327 // need the coll config files
328 Element coll_config_elem = loadCollConfigFile();
329 Element build_config_elem = loadBuildConfigFile();
330 if (coll_config_elem == null||build_config_elem == null) {
331 // wont be able to do any of the requests
332 return false;
333 }
334
335 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
336 return configureServiceRacks(coll_config_elem, build_config_elem);
337 }
338
339 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
340 return findAndLoadInfo(coll_config_elem, build_config_elem);
341
342 }
343
344 logger.error("Collection: cant process system request, configure "+subset);
345 return false;
346 }
347
348 /** handles requests made to the ServiceCluster itself
349 *
350 * @param req - the request Element- <request>
351 * @return the result Element - should be <response>
352 */
353 protected Element processMessage(Element request) {
354
355 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
356 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
357 String type = request.getAttribute(GSXML.TYPE_ATT);
358 String lang = request.getAttribute(GSXML.LANG_ATT);
359 response.setAttribute(GSXML.TYPE_ATT, type);
360
361 logger.error("Collection received a message, attempting to process");
362
363 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
364 logger.error("Received format string request");
365
366 String subaction = request.getAttribute("subaction");
367 logger.error("Subaction is " + subaction);
368
369 String service = request.getAttribute("service");
370 logger.error("Service is " + service);
371
372 String classifier = null;
373 if(service.equals("ClassifierBrowse"))
374 {
375 classifier = request.getAttribute("classifier");
376 logger.error("Classifier is " + classifier);
377 }
378
379 //logger.error("Format string: " + format_string);
380 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
381
382 // check for version file
383
384 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
385 logger.error("Directory is " + directory);
386
387 String version_filename = "";
388 if(service.equals("ClassifierBrowse"))
389 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
390 else
391 version_filename = directory + "query_format_statement_version.txt";
392
393 File version_file = new File(version_filename);
394 logger.error("Version filename is " + version_filename);
395
396
397 if(subaction.equals("update"))
398 {
399 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
400 //String format_string = GSXML.getNodeText(format_element);
401 Element format_statement = (Element) format_element.getFirstChild();
402
403
404 String version_number = "1";
405 BufferedWriter writer;
406
407 try{
408
409 if(version_file.exists())
410 {
411 // Read version
412 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
413 version_number = reader.readLine();
414 int aInt = Integer.parseInt(version_number) + 1;
415 version_number = Integer.toString(aInt);
416 reader.close();
417 }
418 else{
419 // Create
420 version_file.createNewFile();
421 writer = new BufferedWriter(new FileWriter(version_filename));
422 writer.write(version_number);
423 writer.close();
424 }
425
426 // Write version file
427 String format_statement_filename = "";
428
429 if(service.equals("ClassifierBrowse"))
430 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
431 else
432 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
433
434 logger.error("Format statement filename is " + format_statement_filename);
435
436 // Write format statement
437 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
438 writer = new BufferedWriter(new FileWriter(format_statement_filename));
439 writer.write(format_string);
440 writer.close();
441
442 // Update version number
443 writer = new BufferedWriter(new FileWriter(version_filename));
444 writer.write(version_number);
445 writer.close();
446
447 } catch (IOException e) {
448 logger.error("IO Exception "+e);
449 }
450 }
451
452 if(subaction.equals("saveDocument"))
453 {
454 int k;
455 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
456 //String format_string = GSXML.getNodeText(format_element);
457 // Get display tag
458 Element display_format = (Element) format_element.getFirstChild();
459
460 logger.error("I have received a save document request");
461 String format_string = GSXML.xmlNodeToString(display_format);
462 logger.error("Param="+format_string);
463 String collection_config = directory + "collectionConfig.xml";
464 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
465
466 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
467
468 // Get display child
469 if(GSXML.getChildByTagName(current_node, "display") == null)
470 {
471 logger.error("ERROR: does not have a display child");
472 // well then create a format tag
473 Element display_tag = config.createElement("display");
474 current_node = (Node) current_node.appendChild(display_tag);
475 //current_node = (Node) format_tag;
476 }
477
478 else{
479 current_node = GSXML.getChildByTagName(current_node, "display");
480 }
481
482 if(GSXML.getChildByTagName(current_node, "format") == null)
483 {
484 logger.error("ERROR: does not have a format child");
485 // well then create a format tag
486 Element format_tag = config.createElement("format");
487 current_node.appendChild(format_tag);
488 //current_node = (Node) format_tag;
489 }
490
491
492 current_node.replaceChild(config.importNode(display_format,true), GSXML.getChildByTagName(current_node, "format"));
493
494 logger.error(GSXML.xmlNodeToString(current_node));
495
496 logger.error("Convert config to string");
497 String new_config = this.converter.getString(config);
498
499 new_config = StringUtils.replace(new_config, "&lt;", "<");
500 new_config = StringUtils.replace(new_config, "&gt;", ">");
501 new_config = StringUtils.replace(new_config, "&quot;", "\"");
502
503 try{
504 // Write to file (not original! for now)
505 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
506 writer.write(new_config);
507 writer.close();
508 logger.error("All is happy with collection saveDocument");
509 } catch (IOException e) {
510 logger.error("IO Exception "+e);
511 }
512 }
513
514 if(subaction.equals("save"))
515 {
516 logger.error("SAVE format statement");
517
518 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
519 //String format_string = GSXML.getNodeText(format_element);
520 Element format_statement = (Element) format_element.getFirstChild();
521
522 try{
523
524 // open collectionConfig.xml and read in to w3 Document
525 String collection_config = directory + "collectionConfig.xml";
526 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
527
528 //String tag_name = "";
529 int k;
530 int index;
531 Element elem;
532 // Try importing entire tree to this.doc so we can add and remove children at ease
533 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
534 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
535 NodeList current_node_list;
536
537 logger.error("Service is "+service);
538
539 if(service.equals("ClassifierBrowse"))
540 {
541 //tag_name = "browse";
542 // if CLX then need to look in <classifier> X then <format>
543 // default is <browse><format>
544
545 logger.error("Looking for browse");
546 current_node = GSXML.getChildByTagName(current_node, "browse");
547
548 // find CLX
549 if(classifier != null)
550 {
551 logger.error("Classifier is not null");
552 logger.error("Classifier is "+classifier);
553 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
554 index = Integer.parseInt(classifier.substring(2)) - 1;
555 logger.error("classifier index is "+index);
556 // index should be given by X-1
557 current_node = current_node_list.item(index);
558 // what if classifier does not have a format tag?
559 if(GSXML.getChildByTagName(current_node, "format") == null)
560 {
561 logger.error("ERROR: valid classifier but does not have a format child");
562 // well then create a format tag
563 Element format_tag = config.createElement("format");
564 current_node.appendChild(format_tag);
565 //current_node = (Node) format_tag;
566 }
567 }
568 else{
569 logger.error("Classifier is null");
570 // To support all classifiers, set classifier to null? There is the chance here that the format tag does not exist
571 if(GSXML.getChildByTagName(current_node, "format") == null)
572 {
573 logger.error("ERROR: classifier does not have a format child");
574 // well then create a format tag
575 Element format_tag = config.createElement("format");
576 current_node.appendChild(format_tag);
577 //current_node = (Node) format_tag;
578 }
579 }
580 }
581 else if(service.equals("AllClassifierBrowse"))
582 {
583 logger.error("Looking for browse");
584 current_node = GSXML.getChildByTagName(current_node, "browse");
585 if(GSXML.getChildByTagName(current_node, "format") == null)
586 {
587 logger.error("ERROR AllClassifierBrowse: all classifiers do not have a format child");
588 // well then create a format tag
589 Element format_tag = config.createElement("format");
590 current_node.appendChild(format_tag);
591 //current_node = (Node) format_tag;
592 }
593 }
594 else
595 {
596 // look in <format> with no attributes
597 logger.error("I presume this is search");
598
599 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
600 for(k=0; k<current_node_list.getLength(); k++)
601 {
602 current_node = current_node_list.item(k);
603 // if current_node has no attributes then break
604 elem = (Element) current_node;
605 if(elem.hasAttribute("name")==false)
606 break;
607 }
608 }
609
610 current_node.replaceChild(config.importNode(format_statement,true), GSXML.getChildByTagName(current_node, "format"));
611
612 // Now convert config document to string for writing to file
613 logger.error("Convert config to string");
614 String new_config = this.converter.getString(config);
615
616 new_config = StringUtils.replace(new_config, "&lt;", "<");
617 new_config = StringUtils.replace(new_config, "&gt;", ">");
618 new_config = StringUtils.replace(new_config, "&quot;", "\"");
619
620 // Write to file (not original! for now)
621 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
622 writer.write(new_config);
623 writer.close();
624 logger.error("All is happy with collection");
625
626 } catch( Exception ex ) {
627 logger.error("There was an exception "+ex);
628
629 StringWriter sw = new StringWriter();
630 PrintWriter pw = new PrintWriter(sw, true);
631 ex.printStackTrace(pw);
632 pw.flush();
633 sw.flush();
634 logger.error(sw.toString());
635 }
636
637 }
638 }
639 else { // unknown type
640 return super.processMessage(request);
641
642 }
643 return response;
644 }
645
646}
647
648
649
650
Note: See TracBrowser for help on using the repository browser.