source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 24007

Last change on this file since 24007 was 24007, checked in by sjm84, 13 years ago

Updating this branch to match the latest Greenstone3 changes

  • Property svn:keywords set to Author Date Id Revision
File size: 21.7 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.DocumentBuilderFactory;
38
39import org.xml.sax.*;
40import javax.xml.parsers.SAXParserFactory;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Represents a collection in Greenstone. A collection is an extension of
51 * a ServiceCluster - it has local data that the services use.
52 *
53 * @author <a href="mailto:[email protected]">Katherine Don</a>
54 * @see ModuleInterface
55 */
56public class Collection
57 extends ServiceCluster {
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
60
61 /** is this collection being tidied */
62 protected boolean useBook = false;
63 /** is this collection public or private */
64 protected boolean is_public = true;
65
66 /** does this collection provide the OAI service */
67 protected boolean has_oai = true;
68 /** time when this collection was built */
69 protected long lastmodified = 0;
70 /** earliestDatestamp of this collection. Necessary for OAI */
71 protected long earliestDatestamp = 0;
72
73
74 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
75 * the OAIPMH serviceRack
76 */
77 //protected Element service_rack_list = null;
78
79 protected XMLTransformer transformer = null;
80 /** same as setClusterName */
81 public void setCollectionName(String name) {
82 setClusterName(name);
83 }
84
85 public Collection() {
86 super();
87 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
88
89 }
90
91 /**
92 * Configures the collection.
93 *
94 * gsdlHome and collectionName must be set before configure is called.
95 *
96 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
97 * collection metadata is obtained, and services loaded.
98 *
99 * @return true/false on success/fail
100 */
101 public boolean configure() {
102
103 if (this.site_home == null || this.cluster_name== null) {
104 logger.error("Collection: site_home and collection_name must be set before configure called!");
105 return false;
106 }
107
108 Element coll_config_xml = loadCollConfigFile();
109 Element build_config_xml = loadBuildConfigFile();
110
111 if (coll_config_xml==null||build_config_xml==null) {
112 return false;
113 }
114
115 // get the collection type attribute
116 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
117 if(search!=null) {
118 col_type = search.getAttribute(GSXML.TYPE_ATT);
119 }
120
121 // process the metadata and display items
122 findAndLoadInfo(coll_config_xml, build_config_xml);
123
124 // now do the services
125 configureServiceRacks(coll_config_xml, build_config_xml);
126
127 return true;
128
129 }
130
131 public boolean useBook() {
132 return useBook;
133 }
134
135 public boolean isPublic() {
136 return is_public;
137 }
138 // Not used anymore by the OAIReceptionist to find out the earliest datestamp
139 // amongst all oai collections in the repository. May be useful generally.
140 public long getLastmodified() {
141 return lastmodified;
142 }
143 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
144 public long getEarliestDatestamp() {
145 return earliestDatestamp;
146 }
147
148 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
149 * 11/06/2007 xiao
150 */
151 public boolean hasOAI() {
152 return has_oai;
153 }
154 /**
155 * load in the collection config file into a DOM Element
156 */
157 protected Element loadCollConfigFile() {
158
159 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
160
161 if (!coll_config_file.exists()) {
162 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
163 return null;
164 }
165 // get the xml for both files
166 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
167 Element coll_config_elem = null;
168 if (coll_config_doc != null) {
169 coll_config_elem = coll_config_doc.getDocumentElement();
170 }
171 return coll_config_elem;
172
173 }
174
175 /**
176 * load in the collection build config file into a DOM Element
177 */
178 protected Element loadBuildConfigFile() {
179
180 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
181 if (!build_config_file.exists()) {
182 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
183 return null;
184 }
185 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
186 Element build_config_elem = null;
187 if (build_config_doc != null) {
188 build_config_elem = build_config_doc.getDocumentElement();
189 }
190
191 lastmodified = build_config_file.lastModified();
192
193 return build_config_elem;
194 }
195
196 /**
197 * find the metadata and display elems from the two config files and add it to the appropriate lists
198 */
199 protected boolean findAndLoadInfo(Element coll_config_xml,
200 Element build_config_xml){
201
202 // metadata
203 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
204 addMetadata(meta_list);
205 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
206 addMetadata(meta_list);
207
208 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
209 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
210 addMetadata(meta_list);
211
212 // display stuff
213 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
214 if (display_list != null) {
215 resolveMacros(display_list);
216 addDisplayItems(display_list);
217 }
218
219 //check whether the html are tidy or not
220 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
221 if (import_list != null) {
222 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
223 addPlugins(plugin_list);
224 if (plugin_list != null){
225 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlugin");
226 if (plugin_elem != null) {
227 //get the option
228 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-use_realistic_book");
229 if (option_elem != null) {
230 useBook = true;
231 }
232 }
233 }
234 }
235 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
236 if (useBook == true)
237 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
238 else
239 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
240 addMetadata(meta_list);
241
242 // check whether we are public or not
243 if (meta_list != null) {
244 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
245 if (meta_elem != null) {
246 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
247 if (value.equals("false")) {
248 is_public = false;
249 }
250 }
251 }
252 return true;
253
254 }
255
256 protected boolean configureServiceRacks(Element coll_config_xml,
257 Element build_config_xml){
258 clearServices();
259 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
260 configureServiceRackList(service_list, coll_config_xml);
261
262 // collection Config may also contain manually added service racks
263 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
264 if (service_list != null) {
265 configureServiceRackList(service_list, build_config_xml);
266
267 // Check for oai
268 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
269 if (oai_service_rack == null) {
270 has_oai = false;
271 logger.info("No oai for collection: " + this.cluster_name);
272
273 } else {
274 has_oai = true;
275
276 // extract earliestDatestamp from the buildconfig.xml for OAI
277 Element metadata_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
278
279 if(metadata_list != null) {
280 NodeList children = metadata_list.getElementsByTagName(GSXML.METADATA_ELEM);
281 // can't do getChildNodes(), because whitespace, such as newlines, creates Text nodes
282 for (int i = 0; i < children.getLength(); i++) {
283 Element metadata = (Element)children.item(i);
284 if(metadata.getAttribute(GSXML.NAME_ATT).equals(OAIXML.EARLIEST_DATESTAMP)) {
285 String earliestDatestampStr = GSXML.getValue(metadata);
286 if(!earliestDatestampStr.equals("")) {
287 earliestDatestamp = Long.parseLong(earliestDatestampStr);
288 }
289 break; // found a metadata element with name=earliestDatestamp in buildconfig
290 }
291 }
292 }
293
294 // If at the end of this, there is no value for earliestDatestamp, print out a warning
295 logger.warn("No earliestDatestamp in buildConfig.xml for collection: " + this.cluster_name + ". Defaulting to 0.");
296
297 }
298 } else { // no list of services (no ServiceRackList), so no oai_service_rack either
299 // explicitly set has_oai to false here, since it's initialised to true by default
300 has_oai = false;
301 }
302 return true;
303 }
304
305 protected boolean resolveMacros(Element display_list) {
306 if (display_list==null) return false;
307 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
308 if (displaynodes.getLength()>0) {
309 String http_site = this.site_http_address;
310 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
311 for(int k=0; k<displaynodes.getLength(); k++) {
312 Element d = (Element) displaynodes.item(k);
313 String text = GSXML.getNodeText(d);
314 text = StringUtils.replace(text, "_httpsite_", http_site);
315 text = StringUtils.replace(text, "_httpcollection_", http_collection);
316 GSXML.setNodeText(d, text);
317 }
318 }
319 return true;
320 }
321 /**
322 * do a configure on only part of the collection
323 */
324 protected boolean configureSubset(String subset) {
325
326 // need the coll config files
327 Element coll_config_elem = loadCollConfigFile();
328 Element build_config_elem = loadBuildConfigFile();
329 if (coll_config_elem == null||build_config_elem == null) {
330 // wont be able to do any of the requests
331 return false;
332 }
333
334 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
335 return configureServiceRacks(coll_config_elem, build_config_elem);
336 }
337
338 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
339 return findAndLoadInfo(coll_config_elem, build_config_elem);
340
341 }
342
343 logger.error("Collection: cant process system request, configure "+subset);
344 return false;
345 }
346
347 /** handles requests made to the ServiceCluster itself
348 *
349 * @param req - the request Element- <request>
350 * @return the result Element - should be <response>
351 */
352 protected Element processMessage(Element request) {
353
354 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
355 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
356 String type = request.getAttribute(GSXML.TYPE_ATT);
357 String lang = request.getAttribute(GSXML.LANG_ATT);
358 response.setAttribute(GSXML.TYPE_ATT, type);
359
360 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
361 logger.error("Received format string request");
362
363 String subaction = request.getAttribute("subaction");
364 logger.error("Subaction is " + subaction);
365
366 String service = request.getAttribute("service");
367 logger.error("Service is " + service);
368
369 String classifier = null;
370 if(service.equals("ClassifierBrowse"))
371 {
372 classifier = request.getAttribute("classifier");
373 logger.error("Classifier is " + classifier);
374 }
375
376 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
377 //String format_string = GSXML.getNodeText(format_element);
378 Element format_statement = (Element) format_element.getFirstChild();
379
380 //logger.error("Format string: " + format_string);
381 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
382
383 // check for version file
384
385 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
386 logger.error("Directory is " + directory);
387
388 String version_filename = "";
389 if(service.equals("ClassifierBrowse"))
390 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
391 else
392 version_filename = directory + "query_format_statement_version.txt";
393
394 File version_file = new File(version_filename);
395 logger.error("Version filename is " + version_filename);
396
397
398 if(subaction.equals("update"))
399 {
400 String version_number = "1";
401 BufferedWriter writer;
402
403 try{
404
405 if(version_file.exists())
406 {
407 // Read version
408 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
409 version_number = reader.readLine();
410 int aInt = Integer.parseInt(version_number) + 1;
411 version_number = Integer.toString(aInt);
412 reader.close();
413 }
414 else{
415 // Create
416 version_file.createNewFile();
417 writer = new BufferedWriter(new FileWriter(version_filename));
418 writer.write(version_number);
419 writer.close();
420 }
421
422 // Write version file
423 String format_statement_filename = "";
424
425 if(service.equals("ClassifierBrowse"))
426 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
427 else
428 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
429
430 logger.error("Format statement filename is " + format_statement_filename);
431
432 // Write format statement
433 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
434 writer = new BufferedWriter(new FileWriter(format_statement_filename));
435 writer.write(format_string);
436 writer.close();
437
438 // Update version number
439 writer = new BufferedWriter(new FileWriter(version_filename));
440 writer.write(version_number);
441 writer.close();
442
443 } catch (IOException e) {
444 logger.error("IO Exception "+e);
445 }
446 }
447
448 if(subaction.equals("save"))
449 {
450 logger.error("SAVE format statement");
451
452 try{
453
454 // Convert format string to a document
455 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
456 //DocumentBuilder builder = factory.newDocumentBuilder();
457 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
458 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
459 //logger.error(input);
460 //InputSource is = new InputSource( new StringReader( input ) );
461 //logger.error("About to parse format string");
462 //Document format_statement = (Document) builder.parse( is );
463 //logger.error("Done parsing format string");
464
465 // open collectionConfig.xml and read in to w3 Document
466 String collection_config = directory + "collectionConfig.xml";
467 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
468
469 //String tag_name = "";
470 int k;
471 int index;
472 Element elem;
473 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
474 NodeList current_node_list;
475
476 if(service.equals("ClassifierBrowse"))
477 {
478 //tag_name = "browse";
479 // if CLX then need to look in <classifier> X then <format>
480 // default is <browse><format>
481
482 current_node = GSXML.getChildByTagName(current_node, "browse");
483
484 // find CLX
485 if(classifier != null)
486 {
487 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
488 index = Integer.parseInt(classifier.substring(2)) - 1;
489 // index should be given by X-1
490 current_node = current_node_list.item(index);
491 current_node = GSXML.getChildByTagName(current_node, "format");
492 }
493 else{
494 current_node = GSXML.getChildByTagName(current_node, "format");
495 }
496 }
497 else
498 {
499 // look in <format> with no attributes
500
501 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
502 for(k=0; k<current_node_list.getLength(); k++)
503 {
504 current_node = current_node_list.item(k);
505 // if current_node has no attributes then break
506 elem = (Element) current_node;
507 if(elem.hasAttribute("name")==false)
508 break;
509 }
510 }
511
512 // Current_node should be a format tag
513 elem = (Element) current_node;
514
515 logger.error("Current_node = " + elem.getNodeName());
516
517 // seems we want to remove current child/ren and replace with format_statement's child/ren?
518
519 // remove existing
520 current_node_list = elem.getChildNodes();
521 for(k=0; k<current_node_list.getLength(); k++)
522 {
523 current_node = elem.removeChild(current_node_list.item(k));
524 }
525
526 // append new but we have a string!
527 //GSXML.setNodeText(elem, "THIS IS A TEST");
528 //GSXML.setNodeText(elem, format_string);
529
530 current_node_list = format_statement.getChildNodes();
531 for(k=0; k<current_node_list.getLength(); k++)
532 {
533 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
534 //transformed = ((Document)transformed).getDocumentElement();
535 logger.error("Node type: "+current_node_list.item(k).getNodeType());
536 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
537 current_node = elem.appendChild(this.doc.importNode(current_node_list.item(k),true));
538 }
539
540 //String text = GSXML.getNodeText(elem);
541 //logger.error(text);
542 //text = text.replaceAll("_httpsite_", http_site);
543 //text = text.replaceAll("_httpcollection_", http_collection);
544 //GSXML.setNodeText(d, text);
545
546 // Now convert config document to string for writing to file
547 String new_config = this.converter.getString(config);
548
549 // Write to file (not original! for now)
550 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
551 writer.write(new_config);
552 writer.close();
553
554 } catch( Exception ex ) {
555 logger.error("There was an exception "+ex);
556
557 StringWriter sw = new StringWriter();
558 PrintWriter pw = new PrintWriter(sw, true);
559 ex.printStackTrace(pw);
560 pw.flush();
561 sw.flush();
562 logger.error(sw.toString());
563 }
564
565 }
566 }
567 else { // unknown type
568 return super.processMessage(request);
569
570 }
571 return response;
572 }
573
574}
575
576
577
578
Note: See TracBrowser for help on using the repository browser.