source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24135

Last change on this file since 24135 was 24135, checked in by sjb48, 13 years ago

Saving of the format statement to the collectionConfig is now possible. The bug with select boxes where the selected item was not saved has now been fixed.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.2 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35import java.util.*;
36
37import javax.xml.parsers.DocumentBuilder;
38import javax.xml.parsers.DocumentBuilderFactory;
39
40import org.xml.sax.*;
41import javax.xml.parsers.SAXParserFactory;
42import javax.xml.parsers.ParserConfigurationException;
43import javax.xml.parsers.SAXParser;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51 * Represents a collection in Greenstone. A collection is an extension of
52 * a ServiceCluster - it has local data that the services use.
53 *
54 * @author <a href="mailto:[email protected]">Katherine Don</a>
55 * @see ModuleInterface
56 */
57public class Collection
58 extends ServiceCluster {
59
60 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
61
62 /** is this collection being tidied */
63 protected boolean useBook = false;
64 /** is this collection public or private */
65 protected boolean is_public = true;
66
67 /** does this collection provide the OAI service */
68 protected boolean has_oai = true;
69 /** time when this collection was built */
70 protected long lastmodified = 0;
71 /** earliestDatestamp of this collection. Necessary for OAI */
72 protected long earliestDatestamp = 0;
73
74
75 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
76 * the OAIPMH serviceRack
77 */
78 //protected Element service_rack_list = null;
79
80 protected XMLTransformer transformer = null;
81 /** same as setClusterName */
82 public void setCollectionName(String name) {
83 setClusterName(name);
84 }
85
86 public Collection() {
87 super();
88 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
89
90 }
91
92 /**
93 * Configures the collection.
94 *
95 * gsdlHome and collectionName must be set before configure is called.
96 *
97 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
98 * collection metadata is obtained, and services loaded.
99 *
100 * @return true/false on success/fail
101 */
102 public boolean configure() {
103
104 if (this.site_home == null || this.cluster_name== null) {
105 logger.error("Collection: site_home and collection_name must be set before configure called!");
106 return false;
107 }
108
109 Element coll_config_xml = loadCollConfigFile();
110 Element build_config_xml = loadBuildConfigFile();
111
112 if (coll_config_xml==null||build_config_xml==null) {
113 return false;
114 }
115
116 // get the collection type attribute
117 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
118 if(search!=null) {
119 col_type = search.getAttribute(GSXML.TYPE_ATT);
120 }
121
122 // process the metadata and display items
123 findAndLoadInfo(coll_config_xml, build_config_xml);
124
125 // now do the services
126 configureServiceRacks(coll_config_xml, build_config_xml);
127
128 return true;
129
130 }
131
132 public boolean useBook() {
133 return useBook;
134 }
135
136 public boolean isPublic() {
137 return is_public;
138 }
139 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
140 // amongst all oai collections in the repository. May be useful generally.
141 public long getLastmodified() {
142 return lastmodified;
143 }
144 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
145 public long getEarliestDatestamp() {
146 return earliestDatestamp;
147 }
148
149 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
150 * 11/06/2007 xiao
151 */
152 public boolean hasOAI() {
153 return has_oai;
154 }
155 /**
156 * load in the collection config file into a DOM Element
157 */
158 protected Element loadCollConfigFile() {
159
160 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
161
162 if (!coll_config_file.exists()) {
163 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
164 return null;
165 }
166 // get the xml for both files
167 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
168 Element coll_config_elem = null;
169 if (coll_config_doc != null) {
170 coll_config_elem = coll_config_doc.getDocumentElement();
171 }
172 return coll_config_elem;
173
174 }
175
176 /**
177 * load in the collection build config file into a DOM Element
178 */
179 protected Element loadBuildConfigFile() {
180
181 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
182 if (!build_config_file.exists()) {
183 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
184 return null;
185 }
186 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
187 Element build_config_elem = null;
188 if (build_config_doc != null) {
189 build_config_elem = build_config_doc.getDocumentElement();
190 }
191
192 lastmodified = build_config_file.lastModified();
193
194 return build_config_elem;
195 }
196
197 /**
198 * find the metadata and display elems from the two config files and add it to the appropriate lists
199 */
200 protected boolean findAndLoadInfo(Element coll_config_xml,
201 Element build_config_xml){
202
203 // metadata
204 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
205 addMetadata(meta_list);
206 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
207 addMetadata(meta_list);
208
209 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
210 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
211 addMetadata(meta_list);
212
213 // display stuff
214 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
215 if (display_list != null) {
216 resolveMacros(display_list);
217 addDisplayItems(display_list);
218 }
219
220 //check whether the html are tidy or not
221 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
222 if (import_list != null) {
223 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
224 addPlugins(plugin_list);
225 if (plugin_list != null){
226 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
227 if (plugin_elem != null) {
228 //get the option
229 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
230 if (option_elem != null) {
231 useBook = true;
232 }
233 }
234 }
235 }
236 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
237 if (useBook == true)
238 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
239 else
240 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
241 addMetadata(meta_list);
242
243 // check whether we are public or not
244 if (meta_list != null) {
245 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
246 if (meta_elem != null) {
247 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
248 if (value.equals("false")) {
249 is_public = false;
250 }
251 }
252 }
253 return true;
254
255 }
256
257 protected boolean configureServiceRacks(Element coll_config_xml,
258 Element build_config_xml){
259 clearServices();
260 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
261 configureServiceRackList(service_list, coll_config_xml);
262
263 // collection Config may also contain manually added service racks
264 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
265 if (service_list != null) {
266 configureServiceRackList(service_list, build_config_xml);
267
268 // Check for oai
269 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
270 if (oai_service_rack == null) {
271 has_oai = false;
272 logger.info("No oai for collection: " + this.cluster_name);
273
274 } else {
275 has_oai = true;
276
277 // extract earliestDatestamp from the buildconfig.xml for OAI
278 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
279
280 if(metadata_list != null) {
281 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
282 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
283 for (int i = 0; i < children.getLength(); i++) {
284 Element metadata = (Element)children.item(i);
285 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
286 String earliestDatestampStr = GSXML.getValue(metadata);
287 if(!earliestDatestampStr.equals("")) {
288 earliestDatestamp = Long.parseLong(earliestDatestampStr);
289 }
290 break; // found a metadata element with name=earliestDatestamp in buildconfig
291 }
292 }
293 }
294
295 // If at the end of this, there is no value for earliestDatestamp, print out a warning
296 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
297
298 }
299 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
300 // explicitly set has_oai to false here, since it's initialised to true by default
301 has_oai = false;
302 }
303 return true;
304 }
305
306 protected boolean resolveMacros(Element display_list) {
307 if (display_list==null) return false;
308 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
309 if (displaynodes.getLength()>0) {
310 String http_site = this.site_http_address;
311 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
312 for(int k=0; k<displaynodes.getLength(); k++) {
313 Element d = (Element) displaynodes.item(k);
314 String text = GSXML.getNodeText(d);
315 text = StringUtils.replace(text, "_httpsite_", http_site);
316 text = StringUtils.replace(text, "_httpcollection_", http_collection);
317 GSXML.setNodeText(d, text);
318 }
319 }
320 return true;
321 }
322 /**
323 * do a configure on only part of the collection
324 */
325 protected boolean configureSubset(String subset) {
326
327 // need the coll config files
328 Element coll_config_elem = loadCollConfigFile();
329 Element build_config_elem = loadBuildConfigFile();
330 if (coll_config_elem == null||build_config_elem == null) {
331 // wont be able to do any of the requests
332 return false;
333 }
334
335 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
336 return configureServiceRacks(coll_config_elem, build_config_elem);
337 }
338
339 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
340 return findAndLoadInfo(coll_config_elem, build_config_elem);
341
342 }
343
344 logger.error("Collection: cant process system request, configure "+subset);
345 return false;
346 }
347
348 /** handles requests made to the ServiceCluster itself
349 *
350 * @param req - the request Element- <request>
351 * @return the result Element - should be <response>
352 */
353 protected Element processMessage(Element request) {
354
355 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
356 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
357 String type = request.getAttribute(GSXML.TYPE_ATT);
358 String lang = request.getAttribute(GSXML.LANG_ATT);
359 response.setAttribute(GSXML.TYPE_ATT, type);
360
361 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
362 logger.error("Received format string request");
363
364 String subaction = request.getAttribute("subaction");
365 logger.error("Subaction is " + subaction);
366
367 String service = request.getAttribute("service");
368 logger.error("Service is " + service);
369
370 String classifier = null;
371 if(service.equals("ClassifierBrowse"))
372 {
373 classifier = request.getAttribute("classifier");
374 logger.error("Classifier is " + classifier);
375 }
376
377 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
378 //String format_string = GSXML.getNodeText(format_element);
379 Element format_statement = (Element) format_element.getFirstChild();
380
381 //logger.error("Format string: " + format_string);
382 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
383
384 // check for version file
385
386 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
387 logger.error("Directory is " + directory);
388
389 String version_filename = "";
390 if(service.equals("ClassifierBrowse"))
391 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
392 else
393 version_filename = directory + "query_format_statement_version.txt";
394
395 File version_file = new File(version_filename);
396 logger.error("Version filename is " + version_filename);
397
398
399 if(subaction.equals("update"))
400 {
401 String version_number = "1";
402 BufferedWriter writer;
403
404 try{
405
406 if(version_file.exists())
407 {
408 // Read version
409 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
410 version_number = reader.readLine();
411 int aInt = Integer.parseInt(version_number) + 1;
412 version_number = Integer.toString(aInt);
413 reader.close();
414 }
415 else{
416 // Create
417 version_file.createNewFile();
418 writer = new BufferedWriter(new FileWriter(version_filename));
419 writer.write(version_number);
420 writer.close();
421 }
422
423 // Write version file
424 String format_statement_filename = "";
425
426 if(service.equals("ClassifierBrowse"))
427 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
428 else
429 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
430
431 logger.error("Format statement filename is " + format_statement_filename);
432
433 // Write format statement
434 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
435 writer = new BufferedWriter(new FileWriter(format_statement_filename));
436 writer.write(format_string);
437 writer.close();
438
439 // Update version number
440 writer = new BufferedWriter(new FileWriter(version_filename));
441 writer.write(version_number);
442 writer.close();
443
444 } catch (IOException e) {
445 logger.error("IO Exception "+e);
446 }
447 }
448
449 if(subaction.equals("save"))
450 {
451 logger.error("SAVE format statement");
452
453 try{
454
455 // Convert format string to a document
456 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
457 //DocumentBuilder builder = factory.newDocumentBuilder();
458 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
459 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
460 //logger.error(input);
461 //InputSource is = new InputSource( new StringReader( input ) );
462 //logger.error("About to parse format string");
463 //Document format_statement = (Document) builder.parse( is );
464 //logger.error("Done parsing format string");
465
466 // open collectionConfig.xml and read in to w3 Document
467 String collection_config = directory + "collectionConfig.xml";
468 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
469
470 //String tag_name = "";
471 int k;
472 int index;
473 Element elem;
474 // Try importing entire tree to this.doc so we can add and remove children at ease
475 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
476 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
477 NodeList current_node_list;
478
479 logger.error("Service is "+service);
480
481 if(service.equals("ClassifierBrowse"))
482 {
483 //tag_name = "browse";
484 // if CLX then need to look in <classifier> X then <format>
485 // default is <browse><format>
486
487 logger.error("Looking for browse");
488 current_node = GSXML.getChildByTagName(current_node, "browse");
489
490 // find CLX
491 if(classifier != null)
492 {
493 logger.error("Classifier is not null");
494 logger.error("Classifier is "+classifier);
495 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
496 index = Integer.parseInt(classifier.substring(2)) - 1;
497 logger.error("classifier index is "+index);
498 // index should be given by X-1
499 current_node = current_node_list.item(index);
500 // what if classifier does not have a format tag?
501 if(GSXML.getChildByTagName(current_node, "format") == null)
502 {
503 logger.error("ERROR: classifier does not have a format child");
504 // well then create a format tag
505 Element format_tag = config.createElement("format");
506 current_node = (Node) current_node.appendChild(format_tag);
507 //current_node = (Node) format_tag;
508 }
509
510 else{
511 current_node = GSXML.getChildByTagName(current_node, "format");
512 }
513 }
514 else{
515 logger.error("Classifier is null");
516 current_node = GSXML.getChildByTagName(current_node, "format");
517 }
518 }
519 else
520 {
521 // look in <format> with no attributes
522 logger.error("I presume this is search");
523
524 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
525 for(k=0; k<current_node_list.getLength(); k++)
526 {
527 current_node = current_node_list.item(k);
528 // if current_node has no attributes then break
529 elem = (Element) current_node;
530 if(elem.hasAttribute("name")==false)
531 break;
532 }
533 }
534
535 // Current_node should be a format tag
536 elem = (Element) current_node;
537
538 logger.error("Current_node = " + elem.getNodeName());
539
540 // seems we want to remove current child/ren and replace with format_statement's child/ren?
541
542 // remove existing
543 current_node_list = elem.getChildNodes();
544 for(k=0; k<current_node_list.getLength(); k++)
545 {
546 current_node = elem.removeChild(current_node_list.item(k));
547 }
548
549 // append new but we have a string!
550 //GSXML.setNodeText(elem, "THIS IS A TEST");
551 //GSXML.setNodeText(elem, format_string);
552
553 current_node_list = format_statement.getChildNodes();
554 for(k=0; k<current_node_list.getLength(); k++)
555 {
556 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
557 //transformed = ((Document)transformed).getDocumentElement();
558 //logger.error("Node type: "+current_node_list.item(k).getNodeType());
559 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
560 elem.appendChild(config.importNode(current_node_list.item(k),true));
561 }
562
563 //String text = GSXML.getNodeText(elem);
564 //logger.error(text);
565 //text = text.replaceAll("_httpsite_", http_site);
566 //text = text.replaceAll("_httpcollection_", http_collection);
567 //GSXML.setNodeText(d, text);
568
569 // Now convert config document to string for writing to file
570 String new_config = this.converter.getString(config);
571
572 new_config = StringUtils.replace(new_config, "&lt;", "<");
573 new_config = StringUtils.replace(new_config, "&gt;", ">");
574 new_config = StringUtils.replace(new_config, "&quot;", "\"");
575
576 // Write to file (not original! for now)
577 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
578 writer.write(new_config);
579 writer.close();
580
581 } catch( Exception ex ) {
582 logger.error("There was an exception "+ex);
583
584 StringWriter sw = new StringWriter();
585 PrintWriter pw = new PrintWriter(sw, true);
586 ex.printStackTrace(pw);
587 pw.flush();
588 sw.flush();
589 logger.error(sw.toString());
590 }
591
592 }
593 }
594 else { // unknown type
595 return super.processMessage(request);
596
597 }
598 return response;
599 }
600
601}
602
603
604
605
Note: See TracBrowser for help on using the repository browser.