source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 23795

Last change on this file since 23795 was 23795, checked in by davidb, 13 years ago

Changes to take advantage of the more efficient Apache Commons string manipulation classes.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.1 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.DocumentBuilderFactory;
38
39import org.xml.sax.*;
40import javax.xml.parsers.SAXParserFactory;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Represents a collection in Greenstone. A collection is an extension of
51 * a ServiceCluster - it has local data that the services use.
52 *
53 * @author <a href="mailto:[email protected]">Katherine Don</a>
54 * @see ModuleInterface
55 */
56public class Collection
57 extends ServiceCluster {
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
60
61 /** is this collection being tidied */
62 protected boolean useBook = false;
63 /** is this collection public or private */
64 protected boolean is_public = true;
65
66 /** does this collection provide the OAI service */
67 protected boolean has_oai = true;
68 /** time when this collection was built */
69 protected long lastmodified = 0;
70
71 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
72 * the OAIPMH serviceRack
73 */
74 //protected Element service_rack_list = null;
75
76 protected XMLTransformer transformer = null;
77 /** same as setClusterName */
78 public void setCollectionName(String name) {
79 setClusterName(name);
80 }
81
82 public Collection() {
83 super();
84 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
85
86 }
87
88 /**
89 * Configures the collection.
90 *
91 * gsdlHome and collectionName must be set before configure is called.
92 *
93 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
94 * collection metadata is obtained, and services loaded.
95 *
96 * @return true/false on success/fail
97 */
98 public boolean configure() {
99
100 if (this.site_home == null || this.cluster_name== null) {
101 logger.error("Collection: site_home and collection_name must be set before configure called!");
102 return false;
103 }
104
105 Element coll_config_xml = loadCollConfigFile();
106 Element build_config_xml = loadBuildConfigFile();
107
108 if (coll_config_xml==null||build_config_xml==null) {
109 return false;
110 }
111
112 // get the collection type attribute
113 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
114 if(search!=null) {
115 col_type = search.getAttribute(GSXML.TYPE_ATT);
116 }
117
118 // process the metadata and display items
119 findAndLoadInfo(coll_config_xml, build_config_xml);
120
121 // now do the services
122 configureServiceRacks(coll_config_xml, build_config_xml);
123
124 return true;
125
126 }
127
128 public boolean useBook() {
129 return useBook;
130 }
131
132 public boolean isPublic() {
133 return is_public;
134 }
135 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
136 public long getLastmodified() {
137 return lastmodified;
138 }
139 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
140 * 11/06/2007 xiao
141 */
142 public boolean hasOAI() {
143 return has_oai;
144 }
145 /**
146 * load in the collection config file into a DOM Element
147 */
148 protected Element loadCollConfigFile() {
149
150 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
151
152 if (!coll_config_file.exists()) {
153 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
154 return null;
155 }
156 // get the xml for both files
157 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
158 Element coll_config_elem = null;
159 if (coll_config_doc != null) {
160 coll_config_elem = coll_config_doc.getDocumentElement();
161 }
162 return coll_config_elem;
163
164 }
165
166 /**
167 * load in the collection build config file into a DOM Element
168 */
169 protected Element loadBuildConfigFile() {
170
171 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
172 if (!build_config_file.exists()) {
173 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
174 return null;
175 }
176 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
177 Element build_config_elem = null;
178 if (build_config_doc != null) {
179 build_config_elem = build_config_doc.getDocumentElement();
180 }
181
182 lastmodified = build_config_file.lastModified();
183
184 return build_config_elem;
185 }
186
187 /**
188 * find the metadata and display elems from the two config files and add it to the appropriate lists
189 */
190 protected boolean findAndLoadInfo(Element coll_config_xml,
191 Element build_config_xml){
192
193 // metadata
194 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
195 addMetadata(meta_list);
196 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
197 addMetadata(meta_list);
198
199 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
200 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
201 addMetadata(meta_list);
202
203 // display stuff
204 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
205 if (display_list != null) {
206 resolveMacros(display_list);
207 addDisplayItems(display_list);
208 }
209
210 //check whether the html are tidy or not
211 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
212 if (import_list != null) {
213 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
214 addPlugins(plugin_list);
215 if (plugin_list != null){
216 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlug");
217 if (plugin_elem != null) {
218 //get the option
219 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-tidy_html");
220 if (option_elem != null) {
221 useBook = true;
222 }
223 }
224 }
225 }
226 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
227 if (useBook == true)
228 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
229 else
230 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
231 addMetadata(meta_list);
232
233 // check whether we are public or not
234 if (meta_list != null) {
235 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
236 if (meta_elem != null) {
237 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
238 if (value.equals("false")) {
239 is_public = false;
240 }
241 }
242 }
243 return true;
244
245 }
246
247 protected boolean configureServiceRacks(Element coll_config_xml,
248 Element build_config_xml){
249 clearServices();
250 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
251 configureServiceRackList(service_list, coll_config_xml);
252
253 // collection Config may also contain manually added service racks
254 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
255 if (service_list != null) {
256 configureServiceRackList(service_list, build_config_xml);
257
258 // Check for oai
259 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
260 if (oai_service_rack == null) {
261 has_oai = false;
262 logger.info("No oai for collection: " + this.cluster_name);
263
264 } else {
265 has_oai = true;
266 }
267 }
268 return true;
269 }
270
271 protected boolean resolveMacros(Element display_list) {
272 if (display_list==null) return false;
273 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
274 if (displaynodes.getLength()>0) {
275 String http_site = this.site_http_address;
276 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
277 for(int k=0; k<displaynodes.getLength(); k++) {
278 Element d = (Element) displaynodes.item(k);
279 String text = GSXML.getNodeText(d);
280 text = StringUtils.replace(text, "_httpsite_", http_site);
281 text = StringUtils.replace(text, "_httpcollection_", http_collection);
282 GSXML.setNodeText(d, text);
283 }
284 }
285 return true;
286 }
287 /**
288 * do a configure on only part of the collection
289 */
290 protected boolean configureSubset(String subset) {
291
292 // need the coll config files
293 Element coll_config_elem = loadCollConfigFile();
294 Element build_config_elem = loadBuildConfigFile();
295 if (coll_config_elem == null||build_config_elem == null) {
296 // wont be able to do any of the requests
297 return false;
298 }
299
300 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
301 return configureServiceRacks(coll_config_elem, build_config_elem);
302 }
303
304 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
305 return findAndLoadInfo(coll_config_elem, build_config_elem);
306
307 }
308
309 logger.error("Collection: cant process system request, configure "+subset);
310 return false;
311 }
312
313 /** handles requests made to the ServiceCluster itself
314 *
315 * @param req - the request Element- <request>
316 * @return the result Element - should be <response>
317 */
318 protected Element processMessage(Element request) {
319
320 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
321 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
322 String type = request.getAttribute(GSXML.TYPE_ATT);
323 String lang = request.getAttribute(GSXML.LANG_ATT);
324 response.setAttribute(GSXML.TYPE_ATT, type);
325
326 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
327 logger.error("Received format string request");
328
329 String subaction = request.getAttribute("subaction");
330 logger.error("Subaction is " + subaction);
331
332 String service = request.getAttribute("service");
333 logger.error("Service is " + service);
334
335 String classifier = null;
336 if(service.equals("ClassifierBrowse"))
337 {
338 classifier = request.getAttribute("classifier");
339 logger.error("Classifier is " + classifier);
340 }
341
342 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
343 //String format_string = GSXML.getNodeText(format_element);
344 Element format_statement = (Element) format_element.getFirstChild();
345
346 //logger.error("Format string: " + format_string);
347 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
348
349 // check for version file
350
351 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
352 logger.error("Directory is " + directory);
353
354 String version_filename = "";
355 if(service.equals("ClassifierBrowse"))
356 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
357 else
358 version_filename = directory + "query_format_statement_version.txt";
359
360 File version_file = new File(version_filename);
361 logger.error("Version filename is " + version_filename);
362
363
364 if(subaction.equals("update"))
365 {
366 String version_number = "1";
367 BufferedWriter writer;
368
369 try{
370
371 if(version_file.exists())
372 {
373 // Read version
374 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
375 version_number = reader.readLine();
376 int aInt = Integer.parseInt(version_number) + 1;
377 version_number = Integer.toString(aInt);
378 reader.close();
379 }
380 else{
381 // Create
382 version_file.createNewFile();
383 writer = new BufferedWriter(new FileWriter(version_filename));
384 writer.write(version_number);
385 writer.close();
386 }
387
388 // Write version file
389 String format_statement_filename = "";
390
391 if(service.equals("ClassifierBrowse"))
392 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
393 else
394 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
395
396 logger.error("Format statement filename is " + format_statement_filename);
397
398 // Write format statement
399 String format_string = this.converter.getString(format_statement); //GSXML.xmlNodeToString(format_statement);
400 writer = new BufferedWriter(new FileWriter(format_statement_filename));
401 writer.write(format_string);
402 writer.close();
403
404 // Update version number
405 writer = new BufferedWriter(new FileWriter(version_filename));
406 writer.write(version_number);
407 writer.close();
408
409 } catch (IOException e) {
410 logger.error("IO Exception "+e);
411 }
412 }
413
414 if(subaction.equals("save"))
415 {
416 logger.error("SAVE format statement");
417
418 try{
419
420 // Convert format string to a document
421 //DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
422 //DocumentBuilder builder = factory.newDocumentBuilder();
423 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
424 //String input = format_string.substring(0,format_string.length()-1)+"</xml>";
425 //logger.error(input);
426 //InputSource is = new InputSource( new StringReader( input ) );
427 //logger.error("About to parse format string");
428 //Document format_statement = (Document) builder.parse( is );
429 //logger.error("Done parsing format string");
430
431 // open collectionConfig.xml and read in to w3 Document
432 String collection_config = directory + "collectionConfig.xml";
433 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
434
435 //String tag_name = "";
436 int k;
437 int index;
438 Element elem;
439 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
440 NodeList current_node_list;
441
442 if(service.equals("ClassifierBrowse"))
443 {
444 //tag_name = "browse";
445 // if CLX then need to look in <classifier> X then <format>
446 // default is <browse><format>
447
448 current_node = GSXML.getChildByTagName(current_node, "browse");
449
450 // find CLX
451 if(classifier != null)
452 {
453 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
454 index = Integer.parseInt(classifier.substring(2)) - 1;
455 // index should be given by X-1
456 current_node = current_node_list.item(index);
457 current_node = GSXML.getChildByTagName(current_node, "format");
458 }
459 else{
460 current_node = GSXML.getChildByTagName(current_node, "format");
461 }
462 }
463 else
464 {
465 // look in <format> with no attributes
466
467 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
468 for(k=0; k<current_node_list.getLength(); k++)
469 {
470 current_node = current_node_list.item(k);
471 // if current_node has no attributes then break
472 elem = (Element) current_node;
473 if(elem.hasAttribute("name")==false)
474 break;
475 }
476 }
477
478 // Current_node should be a format tag
479 elem = (Element) current_node;
480
481 logger.error("Current_node = " + elem.getNodeName());
482
483 // seems we want to remove current child/ren and replace with format_statement's child/ren?
484
485 // remove existing
486 current_node_list = elem.getChildNodes();
487 for(k=0; k<current_node_list.getLength(); k++)
488 {
489 current_node = elem.removeChild(current_node_list.item(k));
490 }
491
492 // append new but we have a string!
493 //GSXML.setNodeText(elem, "THIS IS A TEST");
494 //GSXML.setNodeText(elem, format_string);
495
496 current_node_list = format_statement.getChildNodes();
497 for(k=0; k<current_node_list.getLength(); k++)
498 {
499 //if(transformed.getNodeType() == Node.DOCUMENT_NODE)
500 //transformed = ((Document)transformed).getDocumentElement();
501 logger.error("Node type: "+current_node_list.item(k).getNodeType());
502 if(current_node_list.item(k).getNodeType() != Node.PROCESSING_INSTRUCTION_NODE)
503 current_node = elem.appendChild(this.doc.importNode(current_node_list.item(k),true));
504 }
505
506 //String text = GSXML.getNodeText(elem);
507 //logger.error(text);
508 //text = text.replaceAll("_httpsite_", http_site);
509 //text = text.replaceAll("_httpcollection_", http_collection);
510 //GSXML.setNodeText(d, text);
511
512 // Now convert config document to string for writing to file
513 String new_config = this.converter.getString(config);
514
515 // Write to file (not original! for now)
516 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
517 writer.write(new_config);
518 writer.close();
519
520 } catch( Exception ex ) {
521 logger.error("There was an exception "+ex);
522
523 StringWriter sw = new StringWriter();
524 PrintWriter pw = new PrintWriter(sw, true);
525 ex.printStackTrace(pw);
526 pw.flush();
527 sw.flush();
528 logger.error(sw.toString());
529 }
530
531 }
532 }
533 else { // unknown type
534 return super.processMessage(request);
535
536 }
537 return response;
538 }
539
540}
541
542
543
544
Note: See TracBrowser for help on using the repository browser.