source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24203

Last change on this file since 24203 was 24203, checked in by sjb48, 13 years ago

Working on document-level format editting

  • Property svn:keywords set to Author Date Id Revision
File size: 25.1 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35import java.util.*;
36
37import javax.xml.parsers.DocumentBuilder;
38import javax.xml.parsers.DocumentBuilderFactory;
39
40import org.xml.sax.*;
41import javax.xml.parsers.SAXParserFactory;
42import javax.xml.parsers.ParserConfigurationException;
43import javax.xml.parsers.SAXParser;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51 * Represents a collection in Greenstone. A collection is an extension of
52 * a ServiceCluster - it has local data that the services use.
53 *
54 * @author <a href="mailto:[email protected]">Katherine Don</a>
55 * @see ModuleInterface
56 */
57public class Collection
58 extends ServiceCluster {
59
60 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
61
62 /** is this collection being tidied */
63 protected boolean useBook = false;
64 /** is this collection public or private */
65 protected boolean is_public = true;
66
67 /** does this collection provide the OAI service */
68 protected boolean has_oai = true;
69 /** time when this collection was built */
70 protected long lastmodified = 0;
71 /** earliestDatestamp of this collection. Necessary for OAI */
72 protected long earliestDatestamp = 0;
73
74
75 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
76 * the OAIPMH serviceRack
77 */
78 //protected Element service_rack_list = null;
79
80 protected XMLTransformer transformer = null;
81 /** same as setClusterName */
82 public void setCollectionName(String name) {
83 setClusterName(name);
84 }
85
86 public Collection() {
87 super();
88 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
89
90 }
91
92 /**
93 * Configures the collection.
94 *
95 * gsdlHome and collectionName must be set before configure is called.
96 *
97 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
98 * collection metadata is obtained, and services loaded.
99 *
100 * @return true/false on success/fail
101 */
102 public boolean configure() {
103
104 if (this.site_home == null || this.cluster_name== null) {
105 logger.error("Collection: site_home and collection_name must be set before configure called!");
106 return false;
107 }
108
109 Element coll_config_xml = loadCollConfigFile();
110 Element build_config_xml = loadBuildConfigFile();
111
112 if (coll_config_xml==null||build_config_xml==null) {
113 return false;
114 }
115
116 // get the collection type attribute
117 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
118 if(search!=null) {
119 col_type = search.getAttribute(GSXML.TYPE_ATT);
120 }
121
122 // process the metadata and display items
123 findAndLoadInfo(coll_config_xml, build_config_xml);
124
125 // now do the services
126 configureServiceRacks(coll_config_xml, build_config_xml);
127
128 return true;
129
130 }
131
132 public boolean useBook() {
133 return useBook;
134 }
135
136 public boolean isPublic() {
137 return is_public;
138 }
139 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
140 // amongst all oai collections in the repository. May be useful generally.
141 public long getLastmodified() {
142 return lastmodified;
143 }
144 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
145 public long getEarliestDatestamp() {
146 return earliestDatestamp;
147 }
148
149 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
150 * 11/06/2007 xiao
151 */
152 public boolean hasOAI() {
153 return has_oai;
154 }
155 /**
156 * load in the collection config file into a DOM Element
157 */
158 protected Element loadCollConfigFile() {
159
160 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
161
162 if (!coll_config_file.exists()) {
163 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
164 return null;
165 }
166 // get the xml for both files
167 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
168 Element coll_config_elem = null;
169 if (coll_config_doc != null) {
170 coll_config_elem = coll_config_doc.getDocumentElement();
171 }
172 return coll_config_elem;
173
174 }
175
176 /**
177 * load in the collection build config file into a DOM Element
178 */
179 protected Element loadBuildConfigFile() {
180
181 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
182 if (!build_config_file.exists()) {
183 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
184 return null;
185 }
186 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
187 Element build_config_elem = null;
188 if (build_config_doc != null) {
189 build_config_elem = build_config_doc.getDocumentElement();
190 }
191
192 lastmodified = build_config_file.lastModified();
193
194 return build_config_elem;
195 }
196
197 /**
198 * find the metadata and display elems from the two config files and add it to the appropriate lists
199 */
200 protected boolean findAndLoadInfo(Element coll_config_xml,
201 Element build_config_xml){
202
203 // metadata
204 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
205 addMetadata(meta_list);
206 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
207 addMetadata(meta_list);
208
209 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
210 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
211 addMetadata(meta_list);
212
213 // display stuff
214 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
215 if (display_list != null) {
216 resolveMacros(display_list);
217 addDisplayItems(display_list);
218 }
219
220 //check whether the html are tidy or not
221 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
222 if (import_list != null) {
223 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
224 addPlugins(plugin_list);
225 if (plugin_list != null){
226 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
227 if (plugin_elem != null) {
228 //get the option
229 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
230 if (option_elem != null) {
231 useBook = true;
232 }
233 }
234 }
235 }
236 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
237 if (useBook == true)
238 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
239 else
240 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
241 addMetadata(meta_list);
242
243 // check whether we are public or not
244 if (meta_list != null) {
245 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
246 if (meta_elem != null) {
247 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
248 if (value.equals("false")) {
249 is_public = false;
250 }
251 }
252 }
253 return true;
254
255 }
256
257 protected boolean configureServiceRacks(Element coll_config_xml,
258 Element build_config_xml){
259 clearServices();
260 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
261 configureServiceRackList(service_list, coll_config_xml);
262
263 // collection Config may also contain manually added service racks
264 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
265 if (service_list != null) {
266 configureServiceRackList(service_list, build_config_xml);
267
268 // Check for oai
269 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
270 if (oai_service_rack == null) {
271 has_oai = false;
272 logger.info("No oai for collection: " + this.cluster_name);
273
274 } else {
275 has_oai = true;
276
277 // extract earliestDatestamp from the buildconfig.xml for OAI
278 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
279
280 if(metadata_list != null) {
281 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
282 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
283 for (int i = 0; i < children.getLength(); i++) {
284 Element metadata = (Element)children.item(i);
285 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
286 String earliestDatestampStr = GSXML.getValue(metadata);
287 if(!earliestDatestampStr.equals("")) {
288 earliestDatestamp = Long.parseLong(earliestDatestampStr);
289 }
290 break; // found a metadata element with name=earliestDatestamp in buildconfig
291 }
292 }
293 }
294
295 // If at the end of this, there is no value for earliestDatestamp, print out a warning
296 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
297
298 }
299 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
300 // explicitly set has_oai to false here, since it's initialised to true by default
301 has_oai = false;
302 }
303 return true;
304 }
305
306 protected boolean resolveMacros(Element display_list) {
307 if (display_list==null) return false;
308 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
309 if (displaynodes.getLength()>0) {
310 String http_site = this.site_http_address;
311 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
312 for(int k=0; k<displaynodes.getLength(); k++) {
313 Element d = (Element) displaynodes.item(k);
314 String text = GSXML.getNodeText(d);
315 text = StringUtils.replace(text, "_httpsite_", http_site);
316 text = StringUtils.replace(text, "_httpcollection_", http_collection);
317 GSXML.setNodeText(d, text);
318 }
319 }
320 return true;
321 }
322 /**
323 * do a configure on only part of the collection
324 */
325 protected boolean configureSubset(String subset) {
326
327 // need the coll config files
328 Element coll_config_elem = loadCollConfigFile();
329 Element build_config_elem = loadBuildConfigFile();
330 if (coll_config_elem == null||build_config_elem == null) {
331 // wont be able to do any of the requests
332 return false;
333 }
334
335 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
336 return configureServiceRacks(coll_config_elem, build_config_elem);
337 }
338
339 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
340 return findAndLoadInfo(coll_config_elem, build_config_elem);
341
342 }
343
344 logger.error("Collection: cant process system request, configure "+subset);
345 return false;
346 }
347
348 /** handles requests made to the ServiceCluster itself
349 *
350 * @param req - the request Element- <request>
351 * @return the result Element - should be <response>
352 */
353 protected Element processMessage(Element request) {
354
355 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
356 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
357 String type = request.getAttribute(GSXML.TYPE_ATT);
358 String lang = request.getAttribute(GSXML.LANG_ATT);
359 response.setAttribute(GSXML.TYPE_ATT, type);
360
361 logger.error("Collection received a message, attempting to process");
362
363 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
364 logger.error("Received format string request");
365
366 String subaction = request.getAttribute("subaction");
367 logger.error("Subaction is " + subaction);
368
369 String service = request.getAttribute("service");
370 logger.error("Service is " + service);
371
372 String classifier = null;
373 if(service.equals("ClassifierBrowse"))
374 {
375 classifier = request.getAttribute("classifier");
376 logger.error("Classifier is " + classifier);
377 }
378
379 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
380 //String format_string = GSXML.getNodeText(format_element);
381 Element format_statement = (Element) format_element.getFirstChild();
382
383 //logger.error("Format string: " + format_string);
384 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
385
386 // check for version file
387
388 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
389 logger.error("Directory is " + directory);
390
391 String version_filename = "";
392 if(service.equals("ClassifierBrowse"))
393 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
394 else
395 version_filename = directory + "query_format_statement_version.txt";
396
397 File version_file = new File(version_filename);
398 logger.error("Version filename is " + version_filename);
399
400
401 if(subaction.equals("update"))
402 {
403 String version_number = "1";
404 BufferedWriter writer;
405
406 try{
407
408 if(version_file.exists())
409 {
410 // Read version
411 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
412 version_number = reader.readLine();
413 int aInt = Integer.parseInt(version_number) + 1;
414 version_number = Integer.toString(aInt);
415 reader.close();
416 }
417 else{
418 // Create
419 version_file.createNewFile();
420 writer = new BufferedWriter(new FileWriter(version_filename));
421 writer.write(version_number);
422 writer.close();
423 }
424
425 // Write version file
426 String format_statement_filename = "";
427
428 if(service.equals("ClassifierBrowse"))
429 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
430 else
431 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
432
433 logger.error("Format statement filename is " + format_statement_filename);
434
435 // Write format statement
436 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
437 writer = new BufferedWriter(new FileWriter(format_statement_filename));
438 writer.write(format_string);
439 writer.close();
440
441 // Update version number
442 writer = new BufferedWriter(new FileWriter(version_filename));
443 writer.write(version_number);
444 writer.close();
445
446 } catch (IOException e) {
447 logger.error("IO Exception "+e);
448 }
449 }
450
451 if(subaction.equals("save"))
452 {
453 logger.error("SAVE format statement");
454
455 try{
456
457 // Convert format string to a document
458 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
459 //DocumentBuilder builder = factory.newDocumentBuilder();
460 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
461 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
462 //logger.error(input);
463 //InputSource is = new InputSource( new StringReader( input ) );
464 //logger.error("About to parse format string");
465 //Document format_statement = (Document) builder.parse( is );
466 //logger.error("Done parsing format string");
467
468 // open collectionConfig.xml and read in to w3 Document
469 String collection_config = directory + "collectionConfig.xml";
470 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
471
472 //String tag_name = "";
473 int k;
474 int index;
475 Element elem;
476 // Try importing entire tree to this.doc so we can add and remove children at ease
477 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
478 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
479 NodeList current_node_list;
480
481 logger.error("Service is "+service);
482
483 if(service.equals("ClassifierBrowse"))
484 {
485 //tag_name = "browse";
486 // if CLX then need to look in <classifier> X then <format>
487 // default is <browse><format>
488
489 logger.error("Looking for browse");
490 current_node = GSXML.getChildByTagName(current_node, "browse");
491
492 // find CLX
493 if(classifier != null)
494 {
495 logger.error("Classifier is not null");
496 logger.error("Classifier is "+classifier);
497 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
498 index = Integer.parseInt(classifier.substring(2)) - 1;
499 logger.error("classifier index is "+index);
500 // index should be given by X-1
501 current_node = current_node_list.item(index);
502 // what if classifier does not have a format tag?
503 if(GSXML.getChildByTagName(current_node, "format") == null)
504 {
505 logger.error("ERROR: valid classifier but does not have a format child");
506 // well then create a format tag
507 Element format_tag = config.createElement("format");
508 current_node = (Node) current_node.appendChild(format_tag);
509 //current_node = (Node) format_tag;
510 }
511
512 else{
513 current_node = GSXML.getChildByTagName(current_node, "format");
514 }
515
516 }
517 else{
518 logger.error("Classifier is null");
519 // To support all classifiers, set classifier to null? There is the chance here that the format tag does not exist
520 if(GSXML.getChildByTagName(current_node, "format") == null)
521 {
522 logger.error("ERROR: classifier does not have a format child");
523 // well then create a format tag
524 Element format_tag = config.createElement("format");
525 current_node = (Node) current_node.appendChild(format_tag);
526 //current_node = (Node) format_tag;
527 }
528 else
529 current_node = GSXML.getChildByTagName(current_node, "format");
530 }
531 }
532 else if(service.equals("AllClassifierBrowse"))
533 {
534 logger.error("Looking for browse");
535 current_node = GSXML.getChildByTagName(current_node, "browse");
536 if(GSXML.getChildByTagName(current_node, "format") == null)
537 {
538 logger.error("ERROR AllClassifierBrowse: all classifiers do not have a format child");
539 // well then create a format tag
540 Element format_tag = config.createElement("format");
541 current_node = (Node) current_node.appendChild(format_tag);
542 //current_node = (Node) format_tag;
543 }
544 else
545 current_node = GSXML.getChildByTagName(current_node, "format");
546 }
547 else
548 {
549 // look in <format> with no attributes
550 logger.error("I presume this is search");
551
552 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
553 for(k=0; k<current_node_list.getLength(); k++)
554 {
555 current_node = current_node_list.item(k);
556 // if current_node has no attributes then break
557 elem = (Element) current_node;
558 if(elem.hasAttribute("name")==false)
559 break;
560 }
561 }
562
563 // Current_node should be a format tag
564 elem = (Element) current_node;
565
566 logger.error("*Current_node = " + elem.getNodeName());
567
568 // seems we want to remove current child/ren and replace with format_statement's child/ren?
569
570 // remove existing
571 current_node_list = elem.getChildNodes();
572 logger.error("About to remove old children");
573 for(k=0; k<current_node_list.getLength(); k++)
574 {
575 current_node = elem.removeChild(current_node_list.item(k));
576 }
577
578 logger.error("old nodes removed");
579
580 // append new but we have a string!
581 //GSXML.setNodeText(elem, "THIS IS A TEST");
582 //GSXML.setNodeText(elem, format_string);
583
584 current_node_list = format_statement.getChildNodes();
585 for(k=0; k<current_node_list.getLength(); k++)
586 {
587 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
588 //transformed = ((Document)transformed).getDocumentElement();
589 //logger.error("Node type: "+current_node_list.item(k).getNodeType());
590 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
591 elem.appendChild(config.importNode(current_node_list.item(k),true));
592 }
593 logger.error("new nodes added");
594
595 //String text = GSXML.getNodeText(elem);
596 //logger.error(text);
597 //text = text.replaceAll("_httpsite_", http_site);
598 //text = text.replaceAll("_httpcollection_", http_collection);
599 //GSXML.setNodeText(d, text);
600
601 // Now convert config document to string for writing to file
602 logger.error("Convert config to string");
603 String new_config = this.converter.getString(config);
604
605 new_config = StringUtils.replace(new_config, "&lt;", "<");
606 new_config = StringUtils.replace(new_config, "&gt;", ">");
607 new_config = StringUtils.replace(new_config, "&quot;", "\"");
608
609 // Write to file (not original! for now)
610 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
611 writer.write(new_config);
612 writer.close();
613 logger.error("All is happy with collection");
614
615 } catch( Exception ex ) {
616 logger.error("There was an exception "+ex);
617
618 StringWriter sw = new StringWriter();
619 PrintWriter pw = new PrintWriter(sw, true);
620 ex.printStackTrace(pw);
621 pw.flush();
622 sw.flush();
623 logger.error(sw.toString());
624 }
625
626 }
627 }
628 else { // unknown type
629 return super.processMessage(request);
630
631 }
632 return response;
633 }
634
635}
636
637
638
639
Note: See TracBrowser for help on using the repository browser.