source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/collection/Collection.java@ 23606

Last change on this file since 23606 was 23606, checked in by sjm84, 13 years ago

Merging the latest Greenstone 3 trunk changes into the 64-bit branch

  • Property svn:keywords set to Author Date Id Revision
File size: 19.6 KB
Line 
1/*
2 * Collection.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.collection;
20
21import org.greenstone.gsdl3.util.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.service.*;
24
25
26// java XML classes we're using
27import org.w3c.dom.Document;
28import org.w3c.dom.Node;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32import java.io.*;
33import java.io.File;
34import java.util.HashMap;
35
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.DocumentBuilderFactory;
38
39import org.xml.sax.*;
40import javax.xml.parsers.SAXParserFactory;
41import javax.xml.parsers.ParserConfigurationException;
42import javax.xml.parsers.SAXParser;
43
44import org.apache.log4j.*;
45
46/**
47 * Represents a collection in Greenstone. A collection is an extension of
48 * a ServiceCluster - it has local data that the services use.
49 *
50 * @author <a href="mailto:[email protected]">Katherine Don</a>
51 * @see ModuleInterface
52 */
53public class Collection
54 extends ServiceCluster {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.collection.Collection.class.getName());
57
58 /** is this collection being tidied */
59 protected boolean useBook = false;
60 /** is this collection public or private */
61 protected boolean is_public = true;
62
63 /** does this collection provide the OAI service */
64 protected boolean has_oai = true;
65 /** time when this collection was built */
66 protected long lastmodified = 0;
67
68 /** An element containing the serviceRackList element of buildConfig.xml, used to determine whether it contains
69 * the OAIPMH serviceRack
70 */
71 //protected Element service_rack_list = null;
72
73 protected XMLTransformer transformer = null;
74 /** same as setClusterName */
75 public void setCollectionName(String name) {
76 setClusterName(name);
77 }
78
79 public Collection() {
80 super();
81 this.description = this.doc.createElement(GSXML.COLLECTION_ELEM);
82
83 }
84
85 /**
86 * Configures the collection.
87 *
88 * gsdlHome and collectionName must be set before configure is called.
89 *
90 * the file buildcfg.xml is located in gsdlHome/collect/collectionName
91 * collection metadata is obtained, and services loaded.
92 *
93 * @return true/false on success/fail
94 */
95 public boolean configure() {
96
97 if (this.site_home == null || this.cluster_name== null) {
98 logger.error("Collection: site_home and collection_name must be set before configure called!");
99 return false;
100 }
101
102 Element coll_config_xml = loadCollConfigFile();
103 Element build_config_xml = loadBuildConfigFile();
104
105 if (coll_config_xml==null||build_config_xml==null) {
106 return false;
107 }
108
109 // get the collection type attribute
110 Element search = (Element) GSXML.getChildByTagName(coll_config_xml, GSXML.SEARCH_ELEM);
111 if(search!=null) {
112 col_type = search.getAttribute(GSXML.TYPE_ATT);
113 }
114
115 // process the metadata and display items
116 findAndLoadInfo(coll_config_xml, build_config_xml);
117
118 // now do the services
119 configureServiceRacks(coll_config_xml, build_config_xml);
120
121 return true;
122
123 }
124
125 public boolean useBook() {
126 return useBook;
127 }
128
129 public boolean isPublic() {
130 return is_public;
131 }
132 //used by the OAIReceptionist to find out the earliest datestamp amongst all oai collections in the repository
133 public long getLastmodified() {
134 return lastmodified;
135 }
136 /** whether the service_map in ServiceCluster.java contains the service 'OAIPMH'
137 * 11/06/2007 xiao
138 */
139 public boolean hasOAI() {
140 return has_oai;
141 }
142 /**
143 * load in the collection config file into a DOM Element
144 */
145 protected Element loadCollConfigFile() {
146
147 File coll_config_file = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name));
148
149 if (!coll_config_file.exists()) {
150 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+coll_config_file+" does not exist");
151 return null;
152 }
153 // get the xml for both files
154 Document coll_config_doc = this.converter.getDOM(coll_config_file, CONFIG_ENCODING);
155 Element coll_config_elem = null;
156 if (coll_config_doc != null) {
157 coll_config_elem = coll_config_doc.getDocumentElement();
158 }
159 return coll_config_elem;
160
161 }
162
163 /**
164 * load in the collection build config file into a DOM Element
165 */
166 protected Element loadBuildConfigFile() {
167
168 File build_config_file = new File(GSFile.collectionBuildConfigFile(this.site_home, this.cluster_name));
169 if (!build_config_file.exists()) {
170 logger.error("Collection: couldn't configure collection: "+this.cluster_name+", "+build_config_file+" does not exist");
171 return null;
172 }
173 Document build_config_doc = this.converter.getDOM(build_config_file, CONFIG_ENCODING);
174 Element build_config_elem = null;
175 if (build_config_doc != null) {
176 build_config_elem = build_config_doc.getDocumentElement();
177 }
178
179 lastmodified = build_config_file.lastModified();
180
181 return build_config_elem;
182 }
183
184 /**
185 * find the metadata and display elems from the two config files and add it to the appropriate lists
186 */
187 protected boolean findAndLoadInfo(Element coll_config_xml,
188 Element build_config_xml){
189
190 // metadata
191 Element meta_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
192 addMetadata(meta_list);
193 meta_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
194 addMetadata(meta_list);
195
196 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
197 GSXML.addMetadata(this.doc, meta_list, "httpPath", this.site_http_address+"/collect/"+this.cluster_name);
198 addMetadata(meta_list);
199
200 // display stuff
201 Element display_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER);
202 if (display_list != null) {
203 resolveMacros(display_list);
204 addDisplayItems(display_list);
205 }
206
207 //check whether the html are tidy or not
208 Element import_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.IMPORT_ELEM);
209 if (import_list != null) {
210 Element plugin_list = (Element)GSXML.getChildByTagName(import_list, GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER);
211 addPlugins(plugin_list);
212 if (plugin_list != null){
213 Element plugin_elem = (Element)GSXML.getNamedElement(plugin_list, GSXML.PLUGIN_ELEM, GSXML.NAME_ATT, "HTMLPlug");
214 if (plugin_elem != null) {
215 //get the option
216 Element option_elem = (Element)GSXML.getNamedElement(plugin_elem, GSXML.PARAM_OPTION_ELEM, GSXML.NAME_ATT, "-tidy_html");
217 if (option_elem != null) {
218 useBook = true;
219 }
220 }
221 }
222 }
223 meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
224 if (useBook == true)
225 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "tidy");
226 else
227 GSXML.addMetadata(this.doc, meta_list, "tidyoption", "untidy");
228 addMetadata(meta_list);
229
230 // check whether we are public or not
231 if (meta_list != null) {
232 Element meta_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "public");
233 if (meta_elem != null) {
234 String value = GSXML.getValue(meta_elem).toLowerCase().trim();
235 if (value.equals("false")) {
236 is_public = false;
237 }
238 }
239 }
240 return true;
241
242 }
243
244 protected boolean configureServiceRacks(Element coll_config_xml,
245 Element build_config_xml){
246 clearServices();
247 Element service_list = (Element)GSXML.getChildByTagName(build_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
248 configureServiceRackList(service_list, coll_config_xml);
249
250 // collection Config may also contain manually added service racks
251 service_list = (Element)GSXML.getChildByTagName(coll_config_xml, GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
252 if (service_list != null) {
253 configureServiceRackList(service_list, build_config_xml);
254
255 // Check for oai
256 Element oai_service_rack = GSXML.getNamedElement(service_list, GSXML.SERVICE_CLASS_ELEM, OAIXML.NAME, OAIXML.OAIPMH);
257 if (oai_service_rack == null) {
258 has_oai = false;
259 logger.info("No oai for collection: " + this.cluster_name);
260
261 } else {
262 has_oai = true;
263 }
264 }
265 return true;
266 }
267
268 protected boolean resolveMacros(Element display_list) {
269 if (display_list==null) return false;
270 NodeList displaynodes = display_list.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
271 if (displaynodes.getLength()>0) {
272 String http_site = this.site_http_address;
273 String http_collection = this.site_http_address +"/collect/"+this.cluster_name;
274 for(int k=0; k<displaynodes.getLength(); k++) {
275 Element d = (Element) displaynodes.item(k);
276 String text = GSXML.getNodeText(d);
277 text = text.replaceAll("_httpsite_", http_site);
278 text = text.replaceAll("_httpcollection_", http_collection);
279 GSXML.setNodeText(d, text);
280 }
281 }
282 return true;
283 }
284 /**
285 * do a configure on only part of the collection
286 */
287 protected boolean configureSubset(String subset) {
288
289 // need the coll config files
290 Element coll_config_elem = loadCollConfigFile();
291 Element build_config_elem = loadBuildConfigFile();
292 if (coll_config_elem == null||build_config_elem == null) {
293 // wont be able to do any of the requests
294 return false;
295 }
296
297 if (subset.equals(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER)) {
298 return configureServiceRacks(coll_config_elem, build_config_elem);
299 }
300
301 if (subset.equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER) || subset.equals(GSXML.PLUGIN_ELEM+GSXML.LIST_MODIFIER)) {
302 return findAndLoadInfo(coll_config_elem, build_config_elem);
303
304 }
305
306 logger.error("Collection: cant process system request, configure "+subset);
307 return false;
308 }
309
310 /** handles requests made to the ServiceCluster itself
311 *
312 * @param req - the request Element- <request>
313 * @return the result Element - should be <response>
314 */
315 protected Element processMessage(Element request) {
316
317 Element response = this.doc.createElement(GSXML.RESPONSE_ELEM);
318 response.setAttribute(GSXML.FROM_ATT, this.cluster_name);
319 String type = request.getAttribute(GSXML.TYPE_ATT);
320 String lang = request.getAttribute(GSXML.LANG_ATT);
321 response.setAttribute(GSXML.TYPE_ATT, type);
322
323 if (type.equals(GSXML.REQUEST_TYPE_FORMAT_STRING)) {
324 logger.error("Received format string request");
325
326 String subaction = request.getAttribute("subaction");
327 logger.error("Subaction is " + subaction);
328
329 String service = request.getAttribute("service");
330 logger.error("Service is " + service);
331
332 String classifier = null;
333 if(service.equals("ClassifierBrowse"))
334 {
335 classifier = request.getAttribute("classifier");
336 logger.error("Classifier is " + classifier);
337 }
338
339 Element format_element = (Element) GSXML.getChildByTagName(request, GSXML.FORMAT_STRING_ELEM);
340 String format_string = GSXML.getNodeText(format_element);
341 //Element format_statement = (Element) format_element.getFirstChild();
342
343 //logger.error("Format string: " + format_string);
344 logger.error("Config file location = " + GSFile.collectionConfigFile(this.site_home, this.cluster_name));
345
346 // check for version file
347
348 String directory = new File(GSFile.collectionConfigFile(this.site_home, this.cluster_name)).getParent() + File.separator;
349 logger.error("Directory is " + directory);
350
351 String version_filename = "";
352 if(service.equals("ClassifierBrowse"))
353 version_filename = directory + "browse_"+classifier+"_format_statement_version.txt";
354 else
355 version_filename = directory + "query_format_statement_version.txt";
356
357 File version_file = new File(version_filename);
358 logger.error("Version filename is " + version_filename);
359
360
361 if(subaction.equals("update"))
362 {
363 String version_number = "1";
364 BufferedWriter writer;
365
366 try{
367
368 if(version_file.exists())
369 {
370 // Read version
371 BufferedReader reader = new BufferedReader(new FileReader(version_filename));
372 version_number = reader.readLine();
373 int aInt = Integer.parseInt(version_number) + 1;
374 version_number = Integer.toString(aInt);
375 reader.close();
376 }
377 else{
378 // Create
379 version_file.createNewFile();
380 writer = new BufferedWriter(new FileWriter(version_filename));
381 writer.write(version_number);
382 writer.close();
383 }
384
385 // Write version file
386 String format_statement_filename = "";
387
388 if(service.equals("ClassifierBrowse"))
389 format_statement_filename = directory + "browse_"+classifier+"_format_statement_v" + version_number + ".txt";
390 else
391 format_statement_filename = directory + "query_format_statement_v" + version_number + ".txt";
392
393 logger.error("Format statement filename is " + format_statement_filename);
394
395 // Write format statement
396 //String format_string = GSXML.xmlNodeToString(format_statement);
397 writer = new BufferedWriter(new FileWriter(format_statement_filename));
398 writer.write(format_string);
399 writer.close();
400
401 // Update version number
402 writer = new BufferedWriter(new FileWriter(version_filename));
403 writer.write(version_number);
404 writer.close();
405
406 } catch (IOException e) {
407 logger.error("IO Exception "+e);
408 }
409 }
410
411 if(subaction.equals("save"))
412 {
413 logger.error("SAVE format statement");
414
415 try{
416
417 // Convert format string to a document
418 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
419 DocumentBuilder builder = factory.newDocumentBuilder();
420 //String input = "<html><head><title></title></head><body>" + format_string + "</body></html>";
421 String input = format_string.substring(0,format_string.length()-1)+"</xml>";
422 logger.error(input);
423 InputSource is = new InputSource( new StringReader( input ) );
424 logger.error("About to parse format string");
425 Document format_statement = (Document) builder.parse( is );
426 logger.error("Done parsing format string");
427
428 // open collectionConfig.xml and read in to w3 Document
429 String collection_config = directory + "collectionConfig.xml";
430 Document config = this.converter.getDOM(new File(collection_config), "UTF-8");
431
432 //String tag_name = "";
433 int k;
434 int index;
435 Element elem;
436 Node current_node = GSXML.getChildByTagName(config, "CollectionConfig");
437 NodeList current_node_list;
438
439 if(service.equals("ClassifierBrowse"))
440 {
441 //tag_name = "browse";
442 // if CLX then need to look in <classifier> X then <format>
443 // default is <browse><format>
444
445 current_node = GSXML.getChildByTagName(current_node, "browse");
446
447 // find CLX
448 if(classifier != null)
449 {
450 current_node_list = GSXML.getChildrenByTagName(current_node, "classifier");
451 index = Integer.parseInt(classifier.substring(2)) - 1;
452 // index should be given by X-1
453 current_node = current_node_list.item(index);
454 current_node = GSXML.getChildByTagName(current_node, "format");
455 }
456 else{
457 current_node = GSXML.getChildByTagName(current_node, "format");
458 }
459 }
460 else
461 {
462 // look in <format> with no attributes
463
464 current_node_list = GSXML.getChildrenByTagName(current_node, "search");
465 for(k=0; k<current_node_list.getLength(); k++)
466 {
467 current_node = current_node_list.item(k);
468 // if current_node has no attributes then break
469 elem = (Element) current_node;
470 if(elem.hasAttribute("name")==false)
471 break;
472 }
473 }
474
475 // Current_node should be a format tag
476 elem = (Element) current_node;
477
478 logger.error("Current_node = " + elem.getNodeName());
479
480 // seems we want to remove current child/ren and replace with format_statement's child/ren?
481
482 // remove existing
483 current_node_list = elem.getChildNodes();
484 for(k=0; k<current_node_list.getLength(); k++)
485 {
486 current_node = elem.removeChild(current_node_list.item(k));
487 }
488
489 // append new but we have a string!
490 GSXML.setNodeText(elem, format_string);
491
492 //current_node_list = format_statement.getChildNodes();
493 //for(k=0; k<current_node_list.getLength(); k++)
494 //{
495 // current_node = elem.appendChild(current_node_list.item(k));
496 //}
497
498 //String text = GSXML.getNodeText(elem);
499 //logger.error(text);
500 //text = text.replaceAll("_httpsite_", http_site);
501 //text = text.replaceAll("_httpcollection_", http_collection);
502 //GSXML.setNodeText(d, text);
503
504 // Now convert config document to string for writing to file
505 String new_config = GSXML.xmlNodeToString(config);
506
507 // Write to file (not original! for now)
508 BufferedWriter writer = new BufferedWriter(new FileWriter(collection_config+".new"));
509 writer.write(new_config);
510 writer.close();
511
512 } catch( Exception ex ) {
513 logger.error("There was an exception "+ex);
514
515 StringWriter sw = new StringWriter();
516 PrintWriter pw = new PrintWriter(sw, true);
517 ex.printStackTrace(pw);
518 pw.flush();
519 sw.flush();
520 logger.error(sw.toString());
521 }
522
523 }
524 }
525 else { // unknown type
526 return super.processMessage(request);
527
528 }
529 return response;
530 }
531
532}
533
534
535
536
Note: See TracBrowser for help on using the repository browser.