source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8869

Last change on this file since 8869 was 8869, checked in by schweer, 19 years ago

notifications will only be sent if the collectionConfig.xml of the collection has an entry <nofiy host=hostId/>, with hostId being the name and port of the host the notifications should be sent to (for most cases, this will be localhost:8080). note that the alerting service (/research/schweer/gsdl3/packages/gsdl-as) has to be deployed at /alerting for this to work, and soap for localsite has to be enabled.

  • Property svn:keywords set to Author Date Id Revision
File size: 18.3 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import java.sql.*;
22
23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
35import org.greenstone.gsdl3.gs3build.database.*;
36import org.greenstone.gsdl3.gs3build.collection.*;
37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
40
41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
42import org.greenstone.gsdl3.gs3build.util.URLTools;
43
44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
46import org.greenstone.gsdl3.util.XMLConverter;
47
48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
64 String collectionHome;
65 String siteHome;
66 String collectionName;
67 String qualifiedCollectionName; // used as the database name
68 String notifyHost;
69
70 BuildManager buildManager;
71
72 class CollectionClassifier
73 { URL file;
74 String type;
75 List fields;
76 String sort;
77 String className;
78
79 public CollectionClassifier(URL parentURL, String type, String className, Node node)
80 { this.type = type;
81 this.className = className;
82 this.fields = new ArrayList();
83
84 NodeList children = node.getChildNodes();
85 for (int c = 0; c < children.getLength(); c ++) {
86 Node child = children.item(c);
87
88 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
89 String name = child.getNodeName();
90
91 if (name.equals("file")) {
92 NamedNodeMap atts = children.item(c).getAttributes();
93 Node attribute = atts.getNamedItem("URL");
94 String urlString = attribute.getNodeValue();
95 if (urlString == null)
96 continue;
97
98 System.out.println("Path is" + parentURL.getPath());
99 System.out.println("Host is" + parentURL.getHost());
100 System.out.println(urlString);
101
102 try {
103 URL url = new URL(parentURL, urlString);
104 this.file = url;
105 System.out.println(url);
106 }
107 catch (java.net.MalformedURLException malEx) {
108 System.out.println(malEx);
109 }
110 }
111 else if (name.equals("field")) {
112 String fieldName = DOMUtils.getNodeChildText(children.item(c));
113 this.fields.add(fieldName.toString());
114 }
115 else if (name.equals("sort")) {
116 String sortName = DOMUtils.getNodeChildText(children.item(c));
117 this.sort = sortName;
118 }
119 }
120 }
121 }
122
123 public ClassifierInterface getClassifier()
124 { ClassifierInterface classifier = null;
125
126 if (this.type == null) {
127 return null;
128 }
129 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
130
131 if (this.type.toLowerCase().equals("hierarchy")) {
132 System.out.println(" hierarchy file is " + this.file);
133 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
134 }
135 else if (this.type.toLowerCase().equals("azlist")) {
136 classifier = new AZListClassifier(this.className, this.fields);
137 }
138
139 return classifier;
140 }
141 }
142
143 /**
144 * Create the collection manager for a given collection
145 *
146 * @param site the name of the site
147 * @param collection <code>String</code> the name of the collection
148 */
149 public CollectionManager(String site, String collection) {
150
151 String gsdl3Root = System.getProperty("GSDL3HOME");
152 if (gsdl3Root == null) {
153 System.out.println("Error: Unable to locate GSDL3HOME");
154 System.exit(1);
155 //return;
156 }
157
158 this.siteHome = GSFile.siteHome(gsdl3Root, site);
159 File site_dir = new File(this.siteHome);
160 System.out.println(site_dir);
161 if (!site_dir.exists()) {
162 System.out.println("Error: Non-existant site ("+site+") specified");
163 System.exit(1);
164 }
165 site_dir = null;
166 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
167
168 File collect_dir = new File(this.collectionHome);
169 if (!collect_dir.exists()) {
170 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
171 System.exit(1);
172 }
173 collect_dir = null;
174
175 this.collectionName = collection;
176 this.qualifiedCollectionName = site+"_"+collection;
177
178 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
179 /* if (this.database != null) {
180 this.database.clearCollection(collection);
181 this.database = null;
182 }
183 */
184 if (this.database == null) {
185 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
186 boolean success = this.database.initCollection(this.qualifiedCollectionName);
187 if (!success) {
188 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
189 System.exit(1);
190 }
191 }
192
193 this.metadata = new CollectionMetadata();
194
195 File buildDirectory = new File(getBuildDirectory());
196 if (!buildDirectory.exists()) {
197 buildDirectory.mkdir();
198 }
199 if (!buildDirectory.isDirectory()) {
200 System.err.println("Unable to open directory " + buildDirectory + " for writing");
201 System.exit(1);
202 }
203
204 File archiveDirectory = new File(getArchiveDirectory());
205 if (!archiveDirectory.exists()) {
206 archiveDirectory.mkdir();
207 }
208 if (!archiveDirectory.isDirectory()) {
209 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
210 System.exit(1);
211 }
212
213 this.buildDocNo = 1;
214
215 try {
216 GS3SQLSelect select = new GS3SQLSelect("build");
217 select.addField("*");
218 this.database.execute(select.toString());
219 ResultSet results = this.database.getResultSet();
220 if (results != null &&
221 results.first()) {
222 System.out.println("Reading all keys");
223 do {
224 String key = results.getString("buildKey");
225 String value = results.getString("buildValue");
226
227 if (key.equals("NextSeqNo")) {
228 this.buildDocNo = Integer.parseInt(value);
229 }
230 else if (key.equals("lastBuildDate")) {
231 int year, month, day;
232 year = Integer.parseInt(value.substring(0, 4));
233 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
234 day = Integer.parseInt(value.substring(6, 8));
235
236 this.lastBuildDate = new GregorianCalendar(year, month, day);
237 }
238 } while (results.next());
239 }
240 }
241 catch (SQLException ex)
242 { System.out.println(ex);
243 }
244 }
245
246 public void setBuildManager(BuildManager build_man)
247 { this.buildManager = build_man;
248 }
249
250 private void configureBrowsers(Node node, File etcFile)
251 { CollectionClassifier classifier = null;
252 URL etcURL = null;
253
254 etcURL = URLTools.getFileURL(etcFile);
255
256 NodeList children = node.getChildNodes();
257 for (int c = 0; c < children.getLength(); c ++)
258 { // assume that non-element children are irrelevant
259 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
260 { continue;
261 }
262
263 String name = children.item(c).getNodeName();
264 System.out.println(name);
265
266 if (name.equals(GSXML.CLASSIFIER_ELEM))
267 { NamedNodeMap atts = children.item(c).getAttributes();
268
269 // get the type attribute
270 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
271 if (attribute == null) {
272 continue;
273 }
274 String type = attribute.getNodeValue();
275
276 // get the type attribute
277 attribute = atts.getNamedItem(GSXML.NAME_ATT);
278 if (attribute == null) {
279 continue;
280 }
281 String className = attribute.getNodeValue();
282
283 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
284
285 System.out.println("Found classifier " + type);
286
287 // attach the classifier
288 ClassifierInterface classify = classifier.getClassifier();
289 this.buildManager.getClassifierManager().addClassifier(classify);
290 }
291 }
292 }
293
294 public void configureCollection()
295 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
296
297 // get the File and read it in
298 try
299 {
300 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
301 DocumentBuilder builder = factory.newDocumentBuilder();
302 Document document = builder.parse(collectionConfig);
303
304 // TODO: report an error
305 if (document == null)
306 {
307 }
308
309 // now parse the manager file...
310 Element rootElement = document.getDocumentElement();
311
312 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
313 { // TODO: throw exception
314 }
315
316 System.out.println("Configuring collection");
317
318 NodeList children = rootElement.getChildNodes();
319 for (int c = 0; c < children.getLength(); c ++)
320 { // assume that non-element children are irrelevant
321 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
322 { continue;
323 }
324
325 String name = children.item(c).getNodeName();
326
327 // the name is a plugin element
328
329 if (name.equals(GSXML.RECOGNISE_ELEM)) {
330 System.out.println("recognise elem");
331
332 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
333 for(int i=0; i<doc_types.getLength(); i++) {
334 Element doc_type = (Element)doc_types.item(i);
335 String type = doc_type.getAttribute(GSXML.NAME_ATT);
336 System.err.println("adding recogniser, type "+type);
337 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
338 if (ri != null) {
339 ri.configure(doc_type);
340 }
341 }
342 } else if (name.equals(GSXML.SEARCH_ELEM)) {
343 // pick up attributes from the <search> tag now...
344 NamedNodeMap searchAttributes = children.item(c).getAttributes();
345 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
346 String searchType = null;
347 if (searchAttribute != null) {
348 searchType = searchAttribute.getNodeValue();
349 } else {
350 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
351 searchType = MGIndexer.MG_INDEX_TYPE;
352 }
353
354 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
355 String searchName = null;
356 if (searchAttribute != null) {
357 searchName = searchAttribute.getNodeValue();
358 }
359 if (searchName == null) {
360 searchName = "idx"; // need to modify this if we have two search elements with no names
361 }
362 // create the pertinent indexer...
363 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
364
365 if (indexer == null) {
366 continue;
367 }
368
369 // configure the indexer
370 indexer.configure(children.item(c));
371
372 // install it into the build manager
373 this.buildManager.addIndexer(indexer);
374 }
375 else if (name.equals(GSXML.BROWSE_ELEM))
376 { this.configureBrowsers(children.item(c), collectionConfig);
377 }
378 else if (name.equals(GSXML.NOTIFY_ELEM))
379 {
380 this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT);
381 }
382 // TODO: other elements - make a factory-method approach here...
383 else
384 {
385 }
386 }
387 }
388 catch (FactoryConfigurationError e) {
389 System.out.println(e);
390 }
391 catch (ParserConfigurationException ex) {
392 System.out.println(ex);
393 }
394 catch (SAXException ex) {
395 System.out.println(ex);
396 }
397 catch (IOException ex)
398 {
399 System.out.println(ex);
400 }
401
402 System.out.println("<<<Obtaining database>>>>");
403 }
404
405 public String getEtcDirectory()
406 { return GSFile.collectionEtcDir(this.collectionHome);
407 }
408
409 public String getImportDirectory()
410 { return GSFile.collectionImportDir(this.collectionHome);
411 }
412
413 public String getBuildDirectory()
414 { return GSFile.collectionBuildDir(this.collectionHome);
415 }
416
417 public String getArchiveDirectory()
418 { return GSFile.collectionArchiveDir(this.collectionHome);
419 }
420
421 public GS3SQLConnection getDatabase()
422 {
423 return this.database;
424 }
425
426 public long getBuildTimestamp()
427 { return this.lastBuildDate.getTime().getTime();
428 }
429
430 public Date getBuildDate()
431 { return this.lastBuildDate.getTime();
432 }
433
434 public void startBuild()
435 { GregorianCalendar today = new GregorianCalendar();
436
437 if (this.lastBuildDate != null)
438 { // if the build date is different to the last build date, then reset the build
439 // document number
440 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
441 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
442 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
443 { this.buildDocNo = 1;
444 }
445 else
446 { System.out.println("Continuing build sequence from " + this.buildDocNo);
447 }
448 }
449 this.lastBuildDate = today;
450 }
451
452 public void endBuild()
453 {
454 // here we write out the build config file
455 // create the xml for the buildConfig
456 XMLConverter converter = new XMLConverter();
457 Document doc = converter.newDOM();
458 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
459 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
460 build_config.appendChild(meta_list);
461 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
462 build_config.appendChild(service_list);
463
464 // the document structure and metadata retrieval will use GS3REtrieve service, so add it in here
465 Element base_retrieve_service = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
466 base_retrieve_service.setAttribute(GSXML.NAME_ATT, "GS3Retrieve");
467 service_list.appendChild(base_retrieve_service);
468 // ask the indexers to add stuff into the service rack list
469 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
470 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
471 // get the String
472 String build_config_string = converter.getString(build_config);
473 // write it to the file
474 try {
475 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
476 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
477 writer.write(build_config_string, 0, build_config_string.length());
478 writer.close();
479 } catch (Exception e) {
480 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
481 System.err.println(e.getMessage());
482 }
483
484 // Update build date information
485 GS3SQLDelete remove = new GS3SQLDelete("build");
486 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
487 // rem
488 this.database.execute(remove.toString());
489
490 GS3SQLInsert insert = new GS3SQLInsert("build");
491 insert.addValue("buildKey", "NextSeqNo");
492 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
493 this.database.execute(insert.toString());
494
495 insert = new GS3SQLInsert("build");
496 insert.addValue("buildKey", "lastBuildDate");
497 insert.addValue("buildValue", getDateString(this.lastBuildDate));
498 this.database.execute(insert.toString());
499
500 // Do tail of build output
501 Date startDate = this.lastBuildDate.getTime();
502 Date date = new Date();
503
504 long startTime = startDate.getTime();
505 long endTime = date.getTime();
506
507 long difference = ((endTime - startTime) + 500) / 1000;
508
509 System.out.println("Build completed");
510 System.out.println("---------------");
511 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
512 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
513 }
514
515 private static String getDateString(GregorianCalendar date)
516 { StringBuffer dateString = new StringBuffer();
517
518 int value;
519 dateString.append(date.get(Calendar.YEAR));
520
521 // the use of month is a little odd, hence the following
522 // code. Calendar.MONTH yields 0 = January, 1 = February,
523 // etc. hence there is a '+1' added to the month to make
524 // it into January = 1, etc., and the padding is altered
525 // correspondingly.
526 value = date.get(Calendar.MONTH);
527 if (value < 9)
528 { dateString.append("0");
529 }
530 dateString.append(value + 1);
531 value = date.get(Calendar.DAY_OF_MONTH);
532 if (value < 10)
533 dateString.append("0");
534 dateString.append(value);
535
536 return dateString.toString();
537 }
538
539
540 public String getNextDocumentID()
541 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
542
543 int value = this.buildDocNo;
544 this.buildDocNo ++;
545
546 ID.append(":");
547 ID.append(Integer.toString(value));
548 return ID.toString();
549 }
550
551 public int getDocumentNumber()
552 { this.buildDocNo ++;
553 return this.buildDocNo - 1;
554 }
555
556 /**
557 * Get the collection metadata item in the given namespace
558 *
559 * @param <code>String</code> the namespace
560 * @param <code>String</code> the label of the metadata
561 */
562 public String getCollectionMetadata(String namespace, String label)
563 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
564 }
565
566 /**
567 * Set the collection metadata item in the given namespace
568 *
569 * @param <code>String</code> the namespace
570 * @param <code>String</code> the label
571 * @param <code>String</code> the value
572 */
573 public void setCollectionMetadata(String namespace, String label, String value)
574 { this.metadata.setCollectionMetadata(namespace, label, value);
575 }
576
577 public String getCollectionName() {
578 return collectionName;
579 }
580
581/**
582 * @return
583 */
584public String getNotifyHost() {
585 return notifyHost;
586}
587}
588
Note: See TracBrowser for help on using the repository browser.