source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8774

Last change on this file since 8774 was 8774, checked in by schweer, 19 years ago

abort if database couldn't be initialised

  • Property svn:keywords set to Author Date Id Revision
File size: 18.0 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import java.sql.*;
22
23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
35import org.greenstone.gsdl3.gs3build.database.*;
36import org.greenstone.gsdl3.gs3build.collection.*;
37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
40
41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
42import org.greenstone.gsdl3.gs3build.util.URLTools;
43
44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
46import org.greenstone.gsdl3.util.XMLConverter;
47
48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
64 String collectionHome;
65 String siteHome;
66 String collectionName;
67 String qualifiedCollectionName; // used as the database name
68
69 BuildManager buildManager;
70
71 class CollectionClassifier
72 { URL file;
73 String type;
74 List fields;
75 String sort;
76 String className;
77
78 public CollectionClassifier(URL parentURL, String type, String className, Node node)
79 { this.type = type;
80 this.className = className;
81 this.fields = new ArrayList();
82
83 NodeList children = node.getChildNodes();
84 for (int c = 0; c < children.getLength(); c ++) {
85 Node child = children.item(c);
86
87 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
88 String name = child.getNodeName();
89
90 if (name.equals("file")) {
91 NamedNodeMap atts = children.item(c).getAttributes();
92 Node attribute = atts.getNamedItem("URL");
93 String urlString = attribute.getNodeValue();
94 if (urlString == null)
95 continue;
96
97 System.out.println("Path is" + parentURL.getPath());
98 System.out.println("Host is" + parentURL.getHost());
99 System.out.println(urlString);
100
101 try {
102 URL url = new URL(parentURL, urlString);
103 this.file = url;
104 System.out.println(url);
105 }
106 catch (java.net.MalformedURLException malEx) {
107 System.out.println(malEx);
108 }
109 }
110 else if (name.equals("field")) {
111 String fieldName = DOMUtils.getNodeChildText(children.item(c));
112 this.fields.add(fieldName.toString());
113 }
114 else if (name.equals("sort")) {
115 String sortName = DOMUtils.getNodeChildText(children.item(c));
116 this.sort = sortName;
117 }
118 }
119 }
120 }
121
122 public ClassifierInterface getClassifier()
123 { ClassifierInterface classifier = null;
124
125 if (this.type == null) {
126 return null;
127 }
128 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
129
130 if (this.type.toLowerCase().equals("hierarchy")) {
131 System.out.println(" hierarchy file is " + this.file);
132 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
133 }
134 else if (this.type.toLowerCase().equals("azlist")) {
135 classifier = new AZListClassifier(this.className, this.fields);
136 }
137
138 return classifier;
139 }
140 }
141
142 /**
143 * Create the collection manager for a given collection
144 *
145 * @param site the name of the site
146 * @param collection <code>String</code> the name of the collection
147 */
148 public CollectionManager(String site, String collection) {
149
150 String gsdl3Root = System.getProperty("GSDL3HOME");
151 if (gsdl3Root == null) {
152 System.out.println("Error: Unable to locate GSDL3HOME");
153 System.exit(1);
154 //return;
155 }
156
157 this.siteHome = GSFile.siteHome(gsdl3Root, site);
158 File site_dir = new File(this.siteHome);
159 System.out.println(site_dir);
160 if (!site_dir.exists()) {
161 System.out.println("Error: Non-existant site ("+site+") specified");
162 System.exit(1);
163 }
164 site_dir = null;
165 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
166
167 File collect_dir = new File(this.collectionHome);
168 if (!collect_dir.exists()) {
169 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
170 System.exit(1);
171 }
172 collect_dir = null;
173
174 this.collectionName = collection;
175 this.qualifiedCollectionName = site+"_"+collection;
176
177 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
178 /* if (this.database != null) {
179 this.database.clearCollection(collection);
180 this.database = null;
181 }
182 */
183 if (this.database == null) {
184 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
185 boolean success = this.database.initCollection(this.qualifiedCollectionName);
186 if (!success) {
187 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
188 System.exit(1);
189 }
190 }
191
192 this.metadata = new CollectionMetadata();
193
194 File buildDirectory = new File(getBuildDirectory());
195 if (!buildDirectory.exists()) {
196 buildDirectory.mkdir();
197 }
198 if (!buildDirectory.isDirectory()) {
199 System.err.println("Unable to open directory " + buildDirectory + " for writing");
200 System.exit(1);
201 }
202
203 File archiveDirectory = new File(getArchiveDirectory());
204 if (!archiveDirectory.exists()) {
205 archiveDirectory.mkdir();
206 }
207 if (!archiveDirectory.isDirectory()) {
208 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
209 System.exit(1);
210 }
211
212 this.buildDocNo = 1;
213
214 try {
215 GS3SQLSelect select = new GS3SQLSelect("build");
216 select.addField("*");
217 this.database.execute(select.toString());
218 ResultSet results = this.database.getResultSet();
219 if (results != null &&
220 results.first()) {
221 System.out.println("Reading all keys");
222 do {
223 String key = results.getString("buildKey");
224 String value = results.getString("buildValue");
225
226 if (key.equals("NextSeqNo")) {
227 this.buildDocNo = Integer.parseInt(value);
228 }
229 else if (key.equals("lastBuildDate")) {
230 int year, month, day;
231 year = Integer.parseInt(value.substring(0, 4));
232 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
233 day = Integer.parseInt(value.substring(6, 8));
234
235 this.lastBuildDate = new GregorianCalendar(year, month, day);
236 }
237 } while (results.next());
238 }
239 }
240 catch (SQLException ex)
241 { System.out.println(ex);
242 }
243 }
244
245 public void setBuildManager(BuildManager build_man)
246 { this.buildManager = build_man;
247 }
248
249 private void configureBrowsers(Node node, File etcFile)
250 { CollectionClassifier classifier = null;
251 URL etcURL = null;
252
253 etcURL = URLTools.getFileURL(etcFile);
254
255 NodeList children = node.getChildNodes();
256 for (int c = 0; c < children.getLength(); c ++)
257 { // assume that non-element children are irrelevant
258 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
259 { continue;
260 }
261
262 String name = children.item(c).getNodeName();
263 System.out.println(name);
264
265 if (name.equals(GSXML.CLASSIFIER_ELEM))
266 { NamedNodeMap atts = children.item(c).getAttributes();
267
268 // get the type attribute
269 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
270 if (attribute == null) {
271 continue;
272 }
273 String type = attribute.getNodeValue();
274
275 // get the type attribute
276 attribute = atts.getNamedItem(GSXML.NAME_ATT);
277 if (attribute == null) {
278 continue;
279 }
280 String className = attribute.getNodeValue();
281
282 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
283
284 System.out.println("Found classifier " + type);
285
286 // attach the classifier
287 ClassifierInterface classify = classifier.getClassifier();
288 this.buildManager.getClassifierManager().addClassifier(classify);
289 }
290 }
291 }
292
293 public void configureCollection()
294 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
295
296 // get the File and read it in
297 try
298 {
299 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
300 DocumentBuilder builder = factory.newDocumentBuilder();
301 Document document = builder.parse(collectionConfig);
302
303 // TODO: report an error
304 if (document == null)
305 {
306 }
307
308 // now parse the manager file...
309 Element rootElement = document.getDocumentElement();
310
311 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
312 { // TODO: throw exception
313 }
314
315 System.out.println("Configuring collection");
316
317 NodeList children = rootElement.getChildNodes();
318 for (int c = 0; c < children.getLength(); c ++)
319 { // assume that non-element children are irrelevant
320 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
321 { continue;
322 }
323
324 String name = children.item(c).getNodeName();
325
326 // the name is a plugin element
327
328 if (name.equals(GSXML.RECOGNISE_ELEM)) {
329 System.out.println("recognise elem");
330
331 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
332 for(int i=0; i<doc_types.getLength(); i++) {
333 Element doc_type = (Element)doc_types.item(i);
334 String type = doc_type.getAttribute(GSXML.NAME_ATT);
335 System.err.println("adding recogniser, type "+type);
336 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
337 if (ri != null) {
338 ri.configure(doc_type);
339 }
340 }
341 } else if (name.equals(GSXML.SEARCH_ELEM)) {
342 // pick up attributes from the <search> tag now...
343 NamedNodeMap searchAttributes = children.item(c).getAttributes();
344 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
345 String searchType = null;
346 if (searchAttribute != null) {
347 searchType = searchAttribute.getNodeValue();
348 } else {
349 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
350 searchType = MGIndexer.MG_INDEX_TYPE;
351 }
352
353 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
354 String searchName = null;
355 if (searchAttribute != null) {
356 searchName = searchAttribute.getNodeValue();
357 }
358 if (searchName == null) {
359 searchName = "idx"; // need to modify this if we have two search elements with no names
360 }
361 // create the pertinent indexer...
362 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
363
364 if (indexer == null) {
365 continue;
366 }
367
368 // configure the indexer
369 indexer.configure(children.item(c));
370
371 // install it into the build manager
372 this.buildManager.addIndexer(indexer);
373 }
374 else if (name.equals(GSXML.BROWSE_ELEM))
375 { this.configureBrowsers(children.item(c), collectionConfig);
376 }
377 // TODO: other elements - make a factory-method approach here...
378 else
379 {
380 }
381 }
382 }
383 catch (FactoryConfigurationError e) {
384 System.out.println(e);
385 }
386 catch (ParserConfigurationException ex) {
387 System.out.println(ex);
388 }
389 catch (SAXException ex) {
390 System.out.println(ex);
391 }
392 catch (IOException ex)
393 {
394 System.out.println(ex);
395 }
396
397 System.out.println("<<<Obtaining database>>>>");
398 }
399
400 public String getEtcDirectory()
401 { return GSFile.collectionEtcDir(this.collectionHome);
402 }
403
404 public String getImportDirectory()
405 { return GSFile.collectionImportDir(this.collectionHome);
406 }
407
408 public String getBuildDirectory()
409 { return GSFile.collectionBuildDir(this.collectionHome);
410 }
411
412 public String getArchiveDirectory()
413 { return GSFile.collectionArchiveDir(this.collectionHome);
414 }
415
416 public GS3SQLConnection getDatabase()
417 {
418 return this.database;
419 }
420
421 public Date getBuildDate()
422 { return this.lastBuildDate.getTime();
423 }
424
425 public void startBuild()
426 { GregorianCalendar today = new GregorianCalendar();
427
428 if (this.lastBuildDate != null)
429 { // if the build date is different to the last build date, then reset the build
430 // document number
431 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
432 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
433 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
434 { this.buildDocNo = 1;
435 }
436 else
437 { System.out.println("Continuing build sequence from " + this.buildDocNo);
438 }
439 }
440 this.lastBuildDate = today;
441 }
442
443 public void endBuild()
444 {
445 // here we write out the build config file
446 // create the xml for the buildConfig
447 XMLConverter converter = new XMLConverter();
448 Document doc = converter.newDOM();
449 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
450 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
451 build_config.appendChild(meta_list);
452 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
453 build_config.appendChild(service_list);
454
455 // the document structure and metadata retrieval will use GS3REtrieve service, so add it in here
456 Element base_retrieve_service = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
457 base_retrieve_service.setAttribute(GSXML.NAME_ATT, "GS3Retrieve");
458 service_list.appendChild(base_retrieve_service);
459 // ask the indexers to add stuff into the service rack list
460 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
461 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
462 // get the String
463 String build_config_string = converter.getString(build_config);
464 // write it to the file
465 try {
466 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
467 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
468 writer.write(build_config_string, 0, build_config_string.length());
469 writer.close();
470 } catch (Exception e) {
471 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
472 System.err.println(e.getMessage());
473 }
474
475 // Update build date information
476 GS3SQLDelete remove = new GS3SQLDelete("build");
477 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
478 // rem
479 this.database.execute(remove.toString());
480
481 GS3SQLInsert insert = new GS3SQLInsert("build");
482 insert.addValue("buildKey", "NextSeqNo");
483 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
484 this.database.execute(insert.toString());
485
486 insert = new GS3SQLInsert("build");
487 insert.addValue("buildKey", "lastBuildDate");
488 insert.addValue("buildValue", getDateString(this.lastBuildDate));
489 this.database.execute(insert.toString());
490
491 // Do tail of build output
492 Date startDate = this.lastBuildDate.getTime();
493 Date date = new Date();
494
495 long startTime = startDate.getTime();
496 long endTime = date.getTime();
497
498 long difference = ((endTime - startTime) + 500) / 1000;
499
500 System.out.println("Build completed");
501 System.out.println("---------------");
502 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
503 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
504 }
505
506 private static String getDateString(GregorianCalendar date)
507 { StringBuffer dateString = new StringBuffer();
508
509 int value;
510 dateString.append(date.get(Calendar.YEAR));
511
512 // the use of month is a little odd, hence the following
513 // code. Calendar.MONTH yields 0 = January, 1 = February,
514 // etc. hence there is a '+1' added to the month to make
515 // it into January = 1, etc., and the padding is altered
516 // correspondingly.
517 value = date.get(Calendar.MONTH);
518 if (value < 9)
519 { dateString.append("0");
520 }
521 dateString.append(value + 1);
522 value = date.get(Calendar.DAY_OF_MONTH);
523 if (value < 10)
524 dateString.append("0");
525 dateString.append(value);
526
527 return dateString.toString();
528 }
529
530 public String getNextDocumentID()
531 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
532
533 int value = this.buildDocNo;
534 this.buildDocNo ++;
535
536 ID.append(":");
537 ID.append(Integer.toString(value));
538 return ID.toString();
539 }
540
541 public int getDocumentNumber()
542 { this.buildDocNo ++;
543 return this.buildDocNo - 1;
544 }
545
546 /**
547 * Get the collection metadata item in the given namespace
548 *
549 * @param <code>String</code> the namespace
550 * @param <code>String</code> the label of the metadata
551 */
552 public String getCollectionMetadata(String namespace, String label)
553 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
554 }
555
556 /**
557 * Set the collection metadata item in the given namespace
558 *
559 * @param <code>String</code> the namespace
560 * @param <code>String</code> the label
561 * @param <code>String</code> the value
562 */
563 public void setCollectionMetadata(String namespace, String label, String value)
564 { this.metadata.setCollectionMetadata(namespace, label, value);
565 }
566
567 public String getCollectionName() {
568 return collectionName;
569 }
570}
571
Note: See TracBrowser for help on using the repository browser.