source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:keywords set to Author Date Id Revision
File size: 18.3 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import java.sql.*;
22
23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
35import org.greenstone.gsdl3.gs3build.database.*;
36import org.greenstone.gsdl3.gs3build.collection.*;
37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
40
41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
42import org.greenstone.gsdl3.gs3build.util.URLTools;
43
44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
46import org.greenstone.gsdl3.util.XMLConverter;
47
48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
64 String collectionHome;
65 String siteHome;
66 String collectionName;
67 String qualifiedCollectionName; // used as the database name
68 String notifyHost;
69
70 BuildManager buildManager;
71
72 class CollectionClassifier
73 { URL file;
74 String type;
75 List fields;
76 String sort;
77 String className;
78
79 public CollectionClassifier(URL parentURL, String type, String className, Node node)
80 { this.type = type;
81 this.className = className;
82 this.fields = new ArrayList();
83
84 NodeList children = node.getChildNodes();
85 for (int c = 0; c < children.getLength(); c ++) {
86 Node child = children.item(c);
87
88 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
89 String name = child.getNodeName();
90
91 if (name.equals("file")) {
92 NamedNodeMap atts = children.item(c).getAttributes();
93 Node attribute = atts.getNamedItem("URL");
94 String urlString = attribute.getNodeValue();
95 if (urlString == null)
96 continue;
97
98 System.out.println("Path is" + parentURL.getPath());
99 System.out.println("Host is" + parentURL.getHost());
100 System.out.println(urlString);
101
102 try {
103 URL url = new URL(parentURL, urlString);
104 this.file = url;
105 System.out.println(url);
106 }
107 catch (java.net.MalformedURLException malEx) {
108 System.out.println(malEx);
109 }
110 }
111 else if (name.equals("field")) {
112 String fieldName = DOMUtils.getNodeChildText(children.item(c));
113 this.fields.add(fieldName.toString());
114 }
115 else if (name.equals("sort")) {
116 String sortName = DOMUtils.getNodeChildText(children.item(c));
117 this.sort = sortName;
118 }
119 }
120 }
121 }
122
123 public ClassifierInterface getClassifier()
124 { ClassifierInterface classifier = null;
125
126 if (this.type == null) {
127 return null;
128 }
129 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
130
131 if (this.type.toLowerCase().equals("hierarchy")) {
132 System.out.println(" hierarchy file is " + this.file);
133 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
134 }
135 else if (this.type.toLowerCase().equals("azlist")) {
136 classifier = new AZListClassifier(this.className, this.fields);
137 }
138
139 return classifier;
140 }
141 }
142
143 /**
144 * Create the collection manager for a given collection
145 *
146 * @param site the name of the site
147 * @param collection <code>String</code> the name of the collection
148 */
149 public CollectionManager(String site, String collection) {
150
151 String gsdl3Root = System.getProperty("GSDL3HOME");
152 if (gsdl3Root == null) {
153 System.out.println("Error: Unable to locate GSDL3HOME");
154 System.exit(1);
155 //return;
156 }
157
158 this.siteHome = GSFile.siteHome(gsdl3Root, site);
159 File site_dir = new File(this.siteHome);
160 System.out.println(site_dir);
161 if (!site_dir.exists()) {
162 System.out.println("Error: Non-existant site ("+site+") specified");
163 System.exit(1);
164 }
165 site_dir = null;
166 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
167
168 File collect_dir = new File(this.collectionHome);
169 if (!collect_dir.exists()) {
170 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
171 System.exit(1);
172 }
173 collect_dir = null;
174
175 this.collectionName = collection;
176 this.qualifiedCollectionName = site+"_"+collection;
177
178 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
179 /* if (this.database != null) {
180 this.database.clearCollection(collection);
181 this.database = null;
182 }
183 */
184 if (this.database == null) {
185 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
186 boolean success = this.database.initCollection(this.qualifiedCollectionName);
187 if (!success) {
188 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
189 System.exit(1);
190 }
191 }
192
193 this.metadata = new CollectionMetadata();
194
195 File buildDirectory = new File(getBuildDirectory());
196 if (!buildDirectory.exists()) {
197 buildDirectory.mkdir();
198 }
199 if (!buildDirectory.isDirectory()) {
200 System.err.println("Unable to open directory " + buildDirectory + " for writing");
201 System.exit(1);
202 }
203
204 File archiveDirectory = new File(getArchiveDirectory());
205 if (!archiveDirectory.exists()) {
206 archiveDirectory.mkdir();
207 }
208 if (!archiveDirectory.isDirectory()) {
209 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
210 System.exit(1);
211 }
212
213 this.buildDocNo = 1;
214
215 try {
216 GS3SQLSelect select = new GS3SQLSelect("build");
217 select.addField("*");
218 Statement statement = this.database.createStatement();
219 ResultSet results = statement.executeQuery(select.toString());
220 if (results.first()) {
221 System.out.println("Reading all keys");
222 do {
223 String key = results.getString("buildKey");
224 String value = results.getString("buildValue");
225
226 if (key.equals("NextSeqNo")) {
227 this.buildDocNo = Integer.parseInt(value);
228 }
229 else if (key.equals("lastBuildDate")) {
230 int year, month, day;
231 year = Integer.parseInt(value.substring(0, 4));
232 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
233 day = Integer.parseInt(value.substring(6, 8));
234
235 this.lastBuildDate = new GregorianCalendar(year, month, day);
236 }
237 } while (results.next());
238 }
239 statement.close();
240 }
241 catch (SQLException ex)
242 { System.out.println(ex);
243 }
244 }
245
246 public void setBuildManager(BuildManager build_man)
247 { this.buildManager = build_man;
248 }
249
250 private void configureBrowsers(Node node, File etcFile)
251 { CollectionClassifier classifier = null;
252 URL etcURL = null;
253
254 etcURL = URLTools.getFileURL(etcFile);
255
256 NodeList children = node.getChildNodes();
257 for (int c = 0; c < children.getLength(); c ++)
258 { // assume that non-element children are irrelevant
259 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
260 { continue;
261 }
262
263 String name = children.item(c).getNodeName();
264 System.out.println(name);
265
266 if (name.equals(GSXML.CLASSIFIER_ELEM))
267 { NamedNodeMap atts = children.item(c).getAttributes();
268
269 // get the type attribute
270 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
271 if (attribute == null) {
272 continue;
273 }
274 String type = attribute.getNodeValue();
275
276 // get the type attribute
277 attribute = atts.getNamedItem(GSXML.NAME_ATT);
278 if (attribute == null) {
279 continue;
280 }
281 String className = attribute.getNodeValue();
282
283 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
284
285 System.out.println("Found classifier " + type);
286
287 // attach the classifier
288 ClassifierInterface classify = classifier.getClassifier();
289 this.buildManager.getClassifierManager().addClassifier(classify);
290 }
291 }
292 }
293
294 public void configureCollection()
295 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
296
297 // get the File and read it in
298 try
299 {
300 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
301 DocumentBuilder builder = factory.newDocumentBuilder();
302 Document document = builder.parse(collectionConfig);
303
304 // TODO: report an error
305 if (document == null)
306 {
307 }
308
309 // now parse the manager file...
310 Element rootElement = document.getDocumentElement();
311
312 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
313 { // TODO: throw exception
314 }
315
316 System.out.println("Configuring collection");
317
318 NodeList children = rootElement.getChildNodes();
319 for (int c = 0; c < children.getLength(); c ++)
320 { // assume that non-element children are irrelevant
321 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
322 { continue;
323 }
324
325 String name = children.item(c).getNodeName();
326
327 // the name is a plugin element
328
329 if (name.equals(GSXML.RECOGNISE_ELEM)) {
330 System.out.println("recognise elem");
331
332 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
333 for(int i=0; i<doc_types.getLength(); i++) {
334 Element doc_type = (Element)doc_types.item(i);
335 String type = doc_type.getAttribute(GSXML.NAME_ATT);
336 System.err.println("adding recogniser, type "+type);
337 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
338 if (ri != null) {
339 ri.configure(doc_type);
340 }
341 }
342 } else if (name.equals(GSXML.SEARCH_ELEM)) {
343 // pick up attributes from the <search> tag now...
344 NamedNodeMap searchAttributes = children.item(c).getAttributes();
345 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
346 String searchType = null;
347 if (searchAttribute != null) {
348 searchType = searchAttribute.getNodeValue();
349 } else {
350 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
351 searchType = MGIndexer.MG_INDEX_TYPE;
352 }
353
354 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
355 String searchName = null;
356 if (searchAttribute != null) {
357 searchName = searchAttribute.getNodeValue();
358 }
359 if (searchName == null) {
360 searchName = "idx"; // need to modify this if we have two search elements with no names
361 }
362 // create the pertinent indexer...
363 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
364
365 if (indexer == null) {
366 continue;
367 }
368
369 // configure the indexer
370 indexer.configure(children.item(c));
371
372 // install it into the build manager
373 this.buildManager.addIndexer(indexer);
374 }
375 else if (name.equals(GSXML.BROWSE_ELEM))
376 { this.configureBrowsers(children.item(c), collectionConfig);
377 }
378 else if (name.equals(GSXML.NOTIFY_ELEM))
379 {
380 this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT);
381 }
382 // TODO: other elements - make a factory-method approach here...
383 else
384 {
385 }
386 }
387 }
388 catch (FactoryConfigurationError e) {
389 System.out.println(e);
390 }
391 catch (ParserConfigurationException ex) {
392 System.out.println(ex);
393 }
394 catch (SAXException ex) {
395 System.out.println(ex);
396 }
397 catch (IOException ex)
398 {
399 System.out.println(ex);
400 }
401
402 System.out.println("<<<Obtaining database>>>>");
403 }
404
405 public String getEtcDirectory()
406 { return GSFile.collectionEtcDir(this.collectionHome);
407 }
408
409 public String getImportDirectory()
410 { return GSFile.collectionImportDir(this.collectionHome);
411 }
412
413 public String getBuildDirectory()
414 { return GSFile.collectionBuildDir(this.collectionHome);
415 }
416
417 public String getArchiveDirectory()
418 { return GSFile.collectionArchiveDir(this.collectionHome);
419 }
420
421 public GS3SQLConnection getDatabase()
422 {
423 return this.database;
424 }
425
426 public long getBuildTimestamp()
427 { return this.lastBuildDate.getTime().getTime();
428 }
429
430 public Date getBuildDate()
431 { return this.lastBuildDate.getTime();
432 }
433
434 public void startBuild()
435 { GregorianCalendar today = new GregorianCalendar();
436
437 if (this.lastBuildDate != null)
438 { // if the build date is different to the last build date, then reset the build
439 // document number
440 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
441 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
442 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
443 { this.buildDocNo = 1;
444 }
445 else
446 { System.out.println("Continuing build sequence from " + this.buildDocNo);
447 }
448 }
449 this.lastBuildDate = today;
450 }
451
452 public void endBuild()
453 {
454 // here we write out the build config file
455 // create the xml for the buildConfig
456 XMLConverter converter = new XMLConverter();
457 Document doc = converter.newDOM();
458 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
459 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
460 build_config.appendChild(meta_list);
461 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
462 build_config.appendChild(service_list);
463
464 // ask the indexers and classifiers to add stuff into the service rack list
465 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
466 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
467 // get the String
468 String build_config_string = converter.getString(build_config);
469 // write it to the file
470 try {
471 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
472 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
473 writer.write(build_config_string, 0, build_config_string.length());
474 writer.close();
475 } catch (Exception e) {
476 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
477 System.err.println(e.getMessage());
478 }
479
480 // Update build date information
481 try {
482 GS3SQLDelete remove = new GS3SQLDelete("build");
483 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
484 // rem
485 Statement statement = this.database.createStatement();
486 statement.execute(remove.toString());
487
488 GS3SQLInsert insert = new GS3SQLInsert("build");
489 insert.addValue("buildKey", "NextSeqNo");
490 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
491 statement.execute(insert.toString());
492
493 insert = new GS3SQLInsert("build");
494 insert.addValue("buildKey", "lastBuildDate");
495 insert.addValue("buildValue", getDateString(this.lastBuildDate));
496 statement.execute(insert.toString());
497 statement.close();
498 } catch (SQLException e) {
499 System.err.println("CollectionManager.endBuild(): Can't update build information: "+e);
500 }
501
502 // Do tail of build output
503 Date startDate = this.lastBuildDate.getTime();
504 Date date = new Date();
505
506 long startTime = startDate.getTime();
507 long endTime = date.getTime();
508
509 long difference = ((endTime - startTime) + 500) / 1000;
510
511 System.out.println("Build completed");
512 System.out.println("---------------");
513 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
514 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
515 }
516
517 private static String getDateString(GregorianCalendar date)
518 { StringBuffer dateString = new StringBuffer();
519
520 int value;
521 dateString.append(date.get(Calendar.YEAR));
522
523 // the use of month is a little odd, hence the following
524 // code. Calendar.MONTH yields 0 = January, 1 = February,
525 // etc. hence there is a '+1' added to the month to make
526 // it into January = 1, etc., and the padding is altered
527 // correspondingly.
528 value = date.get(Calendar.MONTH);
529 if (value < 9)
530 { dateString.append("0");
531 }
532 dateString.append(value + 1);
533 value = date.get(Calendar.DAY_OF_MONTH);
534 if (value < 10)
535 dateString.append("0");
536 dateString.append(value);
537
538 return dateString.toString();
539 }
540
541
542 public String getNextDocumentID()
543 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
544
545 int value = this.buildDocNo;
546 this.buildDocNo ++;
547
548 ID.append(":");
549 ID.append(Integer.toString(value));
550 return ID.toString();
551 }
552
553 public int getDocumentNumber()
554 { this.buildDocNo ++;
555 return this.buildDocNo - 1;
556 }
557
558 /**
559 * Get the collection metadata item in the given namespace
560 *
561 * @param <code>String</code> the namespace
562 * @param <code>String</code> the label of the metadata
563 */
564 public String getCollectionMetadata(String namespace, String label)
565 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
566 }
567
568 /**
569 * Set the collection metadata item in the given namespace
570 *
571 * @param <code>String</code> the namespace
572 * @param <code>String</code> the label
573 * @param <code>String</code> the value
574 */
575 public void setCollectionMetadata(String namespace, String label, String value)
576 { this.metadata.setCollectionMetadata(namespace, label, value);
577 }
578
579 public String getCollectionName() {
580 return collectionName;
581 }
582
583/**
584 * @return
585 */
586public String getNotifyHost() {
587 return notifyHost;
588}
589}
590
Note: See TracBrowser for help on using the repository browser.