source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24427

Last change on this file since 24427 was 24427, checked in by sjm84, 13 years ago

More tidying

  • Property svn:keywords set to Author Date Id Revision
File size: 20.7 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25// java XML classes we're using
26import org.w3c.dom.Document;
27import org.w3c.dom.Node;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31import java.io.*;
32import java.io.File;
33import java.util.HashMap;
34import java.util.*;
35
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.DocumentBuilderFactory;
38
39import org.xml.sax.*;
40import javax.xml.parsers.SAXParserFactory;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Represents a collection in Greenstone. A collection is an extension of a
51 * ServiceCluster - it has local data that the services use.
52 *
53 * @author <a href="mailto:[email protected]">Katherine Don</a>
54 * @see ModuleInterface
55 */
56public class Collection extends ServiceCluster
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
60
61 /** is this collection being tidied */
62 protected boolean useBook = false;
63 /** is this collection public or private */
64 protected boolean is_public = true;
65
66 /** does this collection provide the OAI service */
67 protected boolean has_oai = true;
68 /** time when this collection was built */
69 protected long lastmodified = 0;
70 /** earliestDatestamp of this collection. Necessary for OAI */
71 protected long earliestDatestamp = 0;
72
73 /**
74 * An element containing the serviceRackList element of buildConfig.xml,
75 * used to determine whether it contains the OAIPMH serviceRack
76 */
77 //protected Element service_rack_list = null;
78
79 protected XMLTransformer transformer = null;
80
81 /** same as setClusterName */
82 public void setCollectionName(String name)
83 {
84 setClusterName(name);
85 }
86
87 public Collection()
88 {
89 super();
90 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
91
92 }
93
94 /**
95 * Configures the collection.
96 *
97 * gsdlHome and collectionName must be set before configure is called.
98 *
99 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
100 * collection metadata is obtained, and services loaded.
101 *
102 * @return true/false on success/fail
103 */
104 public boolean configure()
105 {
106
107 if (this.site_home == null || this.cluster_name == null)
108 {
109 logger.error("Collection: site_home and collection_name must be set before configure called!");
110 return false;
111 }
112
113 Element coll_config_xml = loadCollConfigFile();
114 Element build_config_xml = loadBuildConfigFile();
115
116 if (coll_config_xml == null || build_config_xml == null)
117 {
118 return false;
119 }
120
121 // get the collection type attribute
122 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
123 if (search != null)
124 {
125 col_type = search.getAttribute(GSXML.TYPE_ATT);
126 }
127
128 Element browse = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.INFODB_ELEM);
129 if (browse != null)
130 {
131 db_type = browse.getAttribute(GSXML.TYPE_ATT);
132 }
133 else
134 {
135 db_type = "gdbm"; //Default database type
136 }
137
138 // process the metadata and display items
139 findAndLoadInfo(coll_config_xml, build_config_xml);
140
141 // now do the services
142 configureServiceRacks(coll_config_xml, build_config_xml);
143
144 return true;
145
146 }
147
148 public boolean useBook()
149 {
150 return useBook;
151 }
152
153 public boolean isPublic()
154 {
155 return is_public;
156 }
157
158 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
159 // amongst all oai collections in the repository. May be useful generally.
160 public long getLastmodified()
161 {
162 return lastmodified;
163 }
164
165 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
166 public long getEarliestDatestamp()
167 {
168 return earliestDatestamp;
169 }
170
171 /**
172 * whether the service_map in ServiceCluster.java contains the service
173 * 'OAIPMH' 11/06/2007 xiao
174 */
175 public boolean hasOAI()
176 {
177 return has_oai;
178 }
179
180 /**
181 * load in the collection config file into a DOM Element
182 */
183 protected Element loadCollConfigFile()
184 {
185
186 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
187
188 if (!coll_config_file.exists())
189 {
190 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", " + coll_config_file + " does not exist");
191 return null;
192 }
193 // get the xml for both files
194 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
195 Element coll_config_elem = null;
196 if (coll_config_doc != null)
197 {
198 coll_config_elem = coll_config_doc.getDocumentElement();
199 }
200 return coll_config_elem;
201
202 }
203
204 /**
205 * load in the collection build config file into a DOM Element
206 */
207 protected Element loadBuildConfigFile()
208 {
209
210 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
211 if (!build_config_file.exists())
212 {
213 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", " + build_config_file + " does not exist");
214 return null;
215 }
216 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
217 Element build_config_elem = null;
218 if (build_config_doc != null)
219 {
220 build_config_elem = build_config_doc.getDocumentElement();
221 }
222
223 lastmodified = build_config_file.lastModified();
224
225 return build_config_elem;
226 }
227
228 /**
229 * find the metadata and display elems from the two config files and add it
230 * to the appropriate lists
231 */
232 protected boolean findAndLoadInfo(Element coll_config_xml, Element build_config_xml)
233 {
234
235 // metadata
236 Element meta_list = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
237 addMetadata(meta_list);
238 meta_list = (Element) GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
239 addMetadata(meta_list);
240
241 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
242 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address + "/collect/" + this.cluster_name);
243 addMetadata(meta_list);
244
245 // display stuff
246 Element display_list = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER);
247 if (display_list != null)
248 {
249 resolveMacros(display_list);
250 addDisplayItems(display_list);
251 }
252
253 //check whether the html are tidy or not
254 Element import_list = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
255 if (import_list != null)
256 {
257 Element plugin_list = (Element) GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM + GSXML.LIST_MODIFIER);
258 addPlugins(plugin_list);
259 if (plugin_list != null)
260 {
261 Element plugin_elem = (Element) GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
262 if (plugin_elem != null)
263 {
264 //get the option
265 Element option_elem = (Element) GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
266 if (option_elem != null)
267 {
268 useBook = true;
269 }
270 }
271 }
272 }
273 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
274 if (useBook == true)
275 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
276 else
277 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
278 addMetadata(meta_list);
279
280 // check whether we are public or not
281 if (meta_list != null)
282 {
283 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
284 if (meta_elem != null)
285 {
286 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
287 if (value.equals("false"))
288 {
289 is_public = false;
290 }
291 }
292 }
293 return true;
294
295 }
296
297 protected boolean configureServiceRacks(Element coll_config_xml, Element build_config_xml)
298 {
299 clearServices();
300 Element service_list = (Element) GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM + GSXML.LIST_MODIFIER);
301 configureServiceRackList(service_list, coll_config_xml);
302
303 // collection Config may also contain manually added service racks
304 service_list = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM + GSXML.LIST_MODIFIER);
305 if (service_list != null)
306 {
307 configureServiceRackList(service_list, build_config_xml);
308
309 // Check for oai
310 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
311 if (oai_service_rack == null)
312 {
313 has_oai = false;
314 logger.info("No oai for collection: " + this.cluster_name);
315
316 }
317 else
318 {
319 has_oai = true;
320
321 // extract earliestDatestamp from the buildconfig.xml for OAI
322 Element metadata_list = (Element) GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
323
324 if (metadata_list != null)
325 {
326 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
327 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
328 for (int i = 0; i < children.getLength(); i++)
329 {
330 Element metadata = (Element) children.item(i);
331 if (metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP))
332 {
333 String earliestDatestampStr = GSXML.getValue(metadata);
334 if (!earliestDatestampStr.equals(""))
335 {
336 earliestDatestamp = Long.parseLong(earliestDatestampStr);
337 }
338 break; // found a metadata element with name=earliestDatestamp in buildconfig
339 }
340 }
341 }
342
343 // If at the end of this, there is no value for earliestDatestamp, print out a warning
344 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
345
346 }
347 }
348 else
349 { // no list of services (no ServiceRackList), so no oai_service_rack either
350 // explicitly set has_oai to false here, since it's initialised to true by default
351 has_oai = false;
352 }
353 return true;
354 }
355
356 protected boolean resolveMacros(Element display_list)
357 {
358 if (display_list == null)
359 return false;
360 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
361 if (displaynodes.getLength() > 0)
362 {
363 String http_site = this.site_http_address;
364 String http_collection = this.site_http_address + "/collect/" + this.cluster_name;
365 for (int k = 0; k < displaynodes.getLength(); k++)
366 {
367 Element d = (Element) displaynodes.item(k);
368 String text = GSXML.getNodeText(d);
369 text = StringUtils.replace(text, "_httpsite_", http_site);
370 text = StringUtils.replace(text, "_httpcollection_", http_collection);
371 GSXML.setNodeText(d, text);
372 }
373 }
374 return true;
375 }
376
377 /**
378 * do a configure on only part of the collection
379 */
380 protected boolean configureSubset(String subset)
381 {
382
383 // need the coll config files
384 Element coll_config_elem = loadCollConfigFile();
385 Element build_config_elem = loadBuildConfigFile();
386 if (coll_config_elem == null || build_config_elem == null)
387 {
388 // wont be able to do any of the requests
389 return false;
390 }
391
392 if (subset.equals(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER))
393 {
394 return configureServiceRacks(coll_config_elem, build_config_elem);
395 }
396
397 if (subset.equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM + GSXML.LIST_MODIFIER))
398 {
399 return findAndLoadInfo(coll_config_elem, build_config_elem);
400
401 }
402
403 logger.error("Collection: cant process system request, configure " + subset);
404 return false;
405 }
406
407 /**
408 * handles requests made to the ServiceCluster itself
409 *
410 * @param req
411 * - the request Element- <request>
412 * @return the result Element - should be <response>
413 */
414 protected Element processMessage(Element request)
415 {
416
417 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
418 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
419 String type = request.getAttribute(GSXML.TYPE_ATT);
420 String lang = request.getAttribute(GSXML.LANG_ATT);
421 response.setAttribute(GSXML.TYPE_ATT, type);
422
423 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING))
424 {
425 String subaction = request.getAttribute("subaction");
426 String service = request.getAttribute("service");
427
428 String classifier = null;
429 if (service.equals("ClassifierBrowse"))
430 {
431 classifier = request.getAttribute("classifier");
432 }
433
434 // check for version file
435 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
436
437 String version_filename = "";
438 if (service.equals("ClassifierBrowse"))
439 version_filename = directory + "browse_" + classifier + "_format_statement_version.txt";
440 else
441 version_filename = directory + "query_format_statement_version.txt";
442
443 File version_file = new File(version_filename);
444
445 if (subaction.equals("update"))
446 {
447 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
448 //String format_string = GSXML.getNodeText(format_element);
449 Element format_statement = (Element) format_element.getFirstChild();
450
451 String version_number = "1";
452 BufferedWriter writer;
453
454 try
455 {
456
457 if (version_file.exists())
458 {
459 // Read version
460 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
461 version_number = reader.readLine();
462 int aInt = Integer.parseInt(version_number) + 1;
463 version_number = Integer.toString(aInt);
464 reader.close();
465 }
466 else
467 {
468 // Create
469 version_file.createNewFile();
470 writer = new BufferedWriter(new FileWriter(version_filename));
471 writer.write(version_number);
472 writer.close();
473 }
474
475 // Write version file
476 String format_statement_filename = "";
477
478 if (service.equals("ClassifierBrowse"))
479 format_statement_filename = directory + "browse_" + classifier + "_format_statement_v" + version_number + ".txt";
480 else
481 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
482
483 // Write format statement
484 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
485 writer = new BufferedWriter(new FileWriter(format_statement_filename));
486 writer.write(format_string);
487 writer.close();
488
489 // Update version number
490 writer = new BufferedWriter(new FileWriter(version_filename));
491 writer.write(version_number);
492 writer.close();
493
494 }
495 catch (IOException e)
496 {
497 logger.error("IO Exception " + e);
498 }
499 }
500
501 if (subaction.equals("saveDocument"))
502 {
503 int k;
504 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
505 //String format_string = GSXML.getNodeText(format_element);
506 // Get display tag
507 Element display_format = (Element) format_element.getFirstChild();
508
509 String format_string = GSXML.xmlNodeToString(display_format);
510 String collection_config = directory + "collectionConfig.xml";
511 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
512
513 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
514
515 // Get display child
516 if (GSXML.getChildByTagName(current_node, "display") == null)
517 {
518 // well then create a format tag
519 Element display_tag = config.createElement("display");
520 current_node = (Node) current_node.appendChild(display_tag);
521 }
522 else
523 {
524 current_node = GSXML.getChildByTagName(current_node, "display");
525 }
526
527 if (GSXML.getChildByTagName(current_node, "format") == null)
528 {
529 // well then create a format tag
530 Element format_tag = config.createElement("format");
531 current_node.appendChild(format_tag);
532 }
533
534 current_node.replaceChild(config.importNode(display_format, true), GSXML.getChildByTagName(current_node, "format"));
535
536 String new_config = this.converter.getString(config);
537
538 new_config = StringUtils.replace(new_config, "&lt;", "<");
539 new_config = StringUtils.replace(new_config, "&gt;", ">");
540 new_config = StringUtils.replace(new_config, "&quot;", "\"");
541
542 try
543 {
544 // Write to file (not original! for now)
545 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config + ".new"));
546 writer.write(new_config);
547 writer.close();
548 }
549 catch (IOException e)
550 {
551 logger.error("IO Exception " + e);
552 }
553 }
554
555 if (subaction.equals("save"))
556 {
557 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
558 Element format_statement = (Element) format_element.getFirstChild();
559
560 try
561 {
562 // open collectionConfig.xml and read in to w3 Document
563 String collection_config = directory + "collectionConfig.xml";
564 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
565
566 //String tag_name = "";
567 int k;
568 int index;
569 Element elem;
570 // Try importing entire tree to this.doc so we can add and remove children at ease
571 //Node current_node = this.doc.importNode(GSXML.getChildByTagName(config, "CollectionConfig"),true);
572 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
573 NodeList current_node_list;
574
575 if (service.equals("ClassifierBrowse"))
576 {
577 //tag_name = "browse";
578 // if CLX then need to look in <classifier> X then <format>
579 // default is <browse><format>
580
581 current_node = GSXML.getChildByTagName(current_node, "browse");
582
583 // find CLX
584 if (classifier != null)
585 {
586 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
587 index = Integer.parseInt(classifier.substring(2)) - 1;
588
589 // index should be given by X-1
590 current_node = current_node_list.item(index);
591 // what if classifier does not have a format tag?
592 if (GSXML.getChildByTagName(current_node, "format") == null)
593 {
594 // well then create a format tag
595 Element format_tag = config.createElement("format");
596 current_node.appendChild(format_tag);
597 }
598 }
599 else
600 {
601 // To support all classifiers, set classifier to null? There is the chance here that the format tag does not exist
602 if (GSXML.getChildByTagName(current_node, "format") == null)
603 {
604 // well then create a format tag
605 Element format_tag = config.createElement("format");
606 current_node.appendChild(format_tag);
607 }
608 }
609 }
610 else if (service.equals("AllClassifierBrowse"))
611 {
612 current_node = GSXML.getChildByTagName(current_node, "browse");
613 if (GSXML.getChildByTagName(current_node, "format") == null)
614 {
615 // well then create a format tag
616 Element format_tag = config.createElement("format");
617 current_node.appendChild(format_tag);
618 }
619 }
620 else
621 {
622 // look in <format> with no attributes
623 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
624 for (k = 0; k < current_node_list.getLength(); k++)
625 {
626 current_node = current_node_list.item(k);
627 // if current_node has no attributes then break
628 elem = (Element) current_node;
629 if (elem.hasAttribute("name") == false)
630 break;
631 }
632 }
633
634 current_node.replaceChild(config.importNode(format_statement, true), GSXML.getChildByTagName(current_node, "format"));
635
636 // Now convert config document to string for writing to file
637 String new_config = this.converter.getString(config);
638
639 new_config = StringUtils.replace(new_config, "&lt;", "<");
640 new_config = StringUtils.replace(new_config, "&gt;", ">");
641 new_config = StringUtils.replace(new_config, "&quot;", "\"");
642
643 // Write to file (not original! for now)
644 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config + ".new"));
645 writer.write(new_config);
646 writer.close();
647
648 }
649 catch (Exception ex)
650 {
651 logger.error("There was an exception " + ex);
652
653 StringWriter sw = new StringWriter();
654 PrintWriter pw = new PrintWriter(sw, true);
655 ex.printStackTrace(pw);
656 pw.flush();
657 sw.flush();
658 logger.error(sw.toString());
659 }
660
661 }
662 }
663 else
664 { // unknown type
665 return super.processMessage(request);
666
667 }
668 return response;
669 }
670
671}
Note: See TracBrowser for help on using the repository browser.