source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8969

Last change on this file since 8969 was 8969, checked in by kjdon, 19 years ago

don't add in GS3Retrieve to service rack list anymore

  • Property svn:keywords set to Author Date Id Revision
File size: 18.0 KB
RevLine 
[5798]1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
[6345]5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
[5798]10import java.util.GregorianCalendar;
[5943]11
[5798]12import java.io.File;
13import java.io.IOException;
[6896]14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
[5798]17
[6345]18import java.net.URL;
[7478]19import java.net.URLEncoder;
[6345]20
[8422]21import java.sql.*;
22
[5798]23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
[8422]35import org.greenstone.gsdl3.gs3build.database.*;
[5798]36import org.greenstone.gsdl3.gs3build.collection.*;
[6345]37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
[8543]39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
[5798]40
[6345]41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
[7478]42import org.greenstone.gsdl3.gs3build.util.URLTools;
[5798]43
[6870]44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
[6896]46import org.greenstone.gsdl3.util.XMLConverter;
[6870]47
[5798]48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
[6345]58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
[5798]64 String collectionHome;
[7468]65 String siteHome;
[6345]66 String collectionName;
[7468]67 String qualifiedCollectionName; // used as the database name
[8869]68 String notifyHost;
[5798]69
[7468]70 BuildManager buildManager;
[6870]71
[6345]72 class CollectionClassifier
[7186]73 { URL file;
[6345]74 String type;
75 List fields;
76 String sort;
[7269]77 String className;
[6345]78
[7269]79 public CollectionClassifier(URL parentURL, String type, String className, Node node)
[6345]80 { this.type = type;
[7269]81 this.className = className;
[6345]82 this.fields = new ArrayList();
83
84 NodeList children = node.getChildNodes();
85 for (int c = 0; c < children.getLength(); c ++) {
86 Node child = children.item(c);
87
88 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
89 String name = child.getNodeName();
90
91 if (name.equals("file")) {
92 NamedNodeMap atts = children.item(c).getAttributes();
93 Node attribute = atts.getNamedItem("URL");
94 String urlString = attribute.getNodeValue();
95 if (urlString == null)
96 continue;
97
[7478]98 System.out.println("Path is" + parentURL.getPath());
99 System.out.println("Host is" + parentURL.getHost());
100 System.out.println(urlString);
101
[7186]102 try {
103 URL url = new URL(parentURL, urlString);
104 this.file = url;
[7478]105 System.out.println(url);
[7186]106 }
107 catch (java.net.MalformedURLException malEx) {
108 System.out.println(malEx);
109 }
[6345]110 }
111 else if (name.equals("field")) {
112 String fieldName = DOMUtils.getNodeChildText(children.item(c));
113 this.fields.add(fieldName.toString());
114 }
115 else if (name.equals("sort")) {
116 String sortName = DOMUtils.getNodeChildText(children.item(c));
117 this.sort = sortName;
118 }
119 }
120 }
121 }
122
123 public ClassifierInterface getClassifier()
[7468]124 { ClassifierInterface classifier = null;
[7269]125
[6696]126 if (this.type == null) {
127 return null;
128 }
[8422]129 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
[7468]130
[6345]131 if (this.type.toLowerCase().equals("hierarchy")) {
[8422]132 System.out.println(" hierarchy file is " + this.file);
[7468]133 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
[6345]134 }
[6696]135 else if (this.type.toLowerCase().equals("azlist")) {
[7468]136 classifier = new AZListClassifier(this.className, this.fields);
[6696]137 }
[6345]138
[7269]139 return classifier;
[6345]140 }
141 }
142
[5798]143 /**
144 * Create the collection manager for a given collection
145 *
[6870]146 * @param site the name of the site
147 * @param collection <code>String</code> the name of the collection
[5798]148 */
[7468]149 public CollectionManager(String site, String collection) {
150
151 String gsdl3Root = System.getProperty("GSDL3HOME");
152 if (gsdl3Root == null) {
153 System.out.println("Error: Unable to locate GSDL3HOME");
154 System.exit(1);
155 //return;
156 }
157
[6870]158 this.siteHome = GSFile.siteHome(gsdl3Root, site);
159 File site_dir = new File(this.siteHome);
[8408]160 System.out.println(site_dir);
[6870]161 if (!site_dir.exists()) {
[7468]162 System.out.println("Error: Non-existant site ("+site+") specified");
163 System.exit(1);
[6870]164 }
165 site_dir = null;
166 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
167
168 File collect_dir = new File(this.collectionHome);
169 if (!collect_dir.exists()) {
[7468]170 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
171 System.exit(1);
[6870]172 }
173 collect_dir = null;
174
175 this.collectionName = collection;
176 this.qualifiedCollectionName = site+"_"+collection;
177
[8745]178 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
[6696]179 /* if (this.database != null) {
[6345]180 this.database.clearCollection(collection);
181 this.database = null;
182 }
[6696]183 */
[5943]184 if (this.database == null) {
[8745]185 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
[8774]186 boolean success = this.database.initCollection(this.qualifiedCollectionName);
187 if (!success) {
188 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
189 System.exit(1);
190 }
[5943]191 }
[5798]192
[5943]193 this.metadata = new CollectionMetadata();
194
[8491]195 File buildDirectory = new File(getBuildDirectory());
[6009]196 if (!buildDirectory.exists()) {
197 buildDirectory.mkdir();
198 }
[8491]199 if (!buildDirectory.isDirectory()) {
200 System.err.println("Unable to open directory " + buildDirectory + " for writing");
201 System.exit(1);
202 }
[6009]203
[8491]204 File archiveDirectory = new File(getArchiveDirectory());
[6009]205 if (!archiveDirectory.exists()) {
206 archiveDirectory.mkdir();
207 }
[8491]208 if (!archiveDirectory.isDirectory()) {
209 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
210 System.exit(1);
211 }
[6009]212
[5798]213 this.buildDocNo = 1;
[8422]214
215 try {
216 GS3SQLSelect select = new GS3SQLSelect("build");
217 select.addField("*");
218 this.database.execute(select.toString());
219 ResultSet results = this.database.getResultSet();
220 if (results != null &&
221 results.first()) {
222 System.out.println("Reading all keys");
223 do {
224 String key = results.getString("buildKey");
225 String value = results.getString("buildValue");
226
227 if (key.equals("NextSeqNo")) {
228 this.buildDocNo = Integer.parseInt(value);
229 }
230 else if (key.equals("lastBuildDate")) {
231 int year, month, day;
232 year = Integer.parseInt(value.substring(0, 4));
233 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
234 day = Integer.parseInt(value.substring(6, 8));
235
236 this.lastBuildDate = new GregorianCalendar(year, month, day);
237 }
238 } while (results.next());
239 }
240 }
241 catch (SQLException ex)
242 { System.out.println(ex);
243 }
[6345]244 }
245
[7186]246 public void setBuildManager(BuildManager build_man)
247 { this.buildManager = build_man;
248 }
249
250 private void configureBrowsers(Node node, File etcFile)
[6696]251 { CollectionClassifier classifier = null;
[7186]252 URL etcURL = null;
[6696]253
[7478]254 etcURL = URLTools.getFileURL(etcFile);
[7186]255
[6696]256 NodeList children = node.getChildNodes();
257 for (int c = 0; c < children.getLength(); c ++)
258 { // assume that non-element children are irrelevant
259 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
260 { continue;
261 }
262
263 String name = children.item(c).getNodeName();
264 System.out.println(name);
265
[6870]266 if (name.equals(GSXML.CLASSIFIER_ELEM))
[7269]267 { NamedNodeMap atts = children.item(c).getAttributes();
268
269 // get the type attribute
[6870]270 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
[6696]271 if (attribute == null) {
272 continue;
273 }
[7269]274 String type = attribute.getNodeValue();
[6696]275
[7269]276 // get the type attribute
277 attribute = atts.getNamedItem(GSXML.NAME_ATT);
278 if (attribute == null) {
279 continue;
280 }
281 String className = attribute.getNodeValue();
282
283 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
[6696]284
285 System.out.println("Found classifier " + type);
286
287 // attach the classifier
288 ClassifierInterface classify = classifier.getClassifier();
[6896]289 this.buildManager.getClassifierManager().addClassifier(classify);
[6696]290 }
291 }
292 }
293
[6896]294 public void configureCollection()
[6870]295 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
[8422]296
[5798]297 // get the File and read it in
298 try
299 {
300 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
301 DocumentBuilder builder = factory.newDocumentBuilder();
302 Document document = builder.parse(collectionConfig);
303
304 // TODO: report an error
305 if (document == null)
306 {
307 }
308
309 // now parse the manager file...
310 Element rootElement = document.getDocumentElement();
311
[6870]312 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
[5798]313 { // TODO: throw exception
314 }
315
[6696]316 System.out.println("Configuring collection");
317
[5798]318 NodeList children = rootElement.getChildNodes();
319 for (int c = 0; c < children.getLength(); c ++)
320 { // assume that non-element children are irrelevant
321 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
322 { continue;
323 }
324
325 String name = children.item(c).getNodeName();
326
327 // the name is a plugin element
[7186]328
[8491]329 if (name.equals(GSXML.RECOGNISE_ELEM)) {
330 System.out.println("recognise elem");
331
332 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
333 for(int i=0; i<doc_types.getLength(); i++) {
334 Element doc_type = (Element)doc_types.item(i);
335 String type = doc_type.getAttribute(GSXML.NAME_ATT);
336 System.err.println("adding recogniser, type "+type);
[8543]337 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
338 if (ri != null) {
339 ri.configure(doc_type);
340 }
[7186]341 }
[8491]342 } else if (name.equals(GSXML.SEARCH_ELEM)) {
[6735]343 // pick up attributes from the <search> tag now...
344 NamedNodeMap searchAttributes = children.item(c).getAttributes();
[6870]345 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
346 String searchType = null;
347 if (searchAttribute != null) {
348 searchType = searchAttribute.getNodeValue();
349 } else {
350 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
351 searchType = MGIndexer.MG_INDEX_TYPE;
352 }
353
354 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
[6735]355 String searchName = null;
356 if (searchAttribute != null) {
[6870]357 searchName = searchAttribute.getNodeValue();
[6735]358 }
[7311]359 if (searchName == null) {
360 searchName = "idx"; // need to modify this if we have two search elements with no names
361 }
[6735]362 // create the pertinent indexer...
363 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
364
365 if (indexer == null) {
366 continue;
[6696]367 }
[6735]368
369 // configure the indexer
370 indexer.configure(children.item(c));
371
372 // install it into the build manager
[6896]373 this.buildManager.addIndexer(indexer);
[5798]374 }
[6870]375 else if (name.equals(GSXML.BROWSE_ELEM))
[7186]376 { this.configureBrowsers(children.item(c), collectionConfig);
[5798]377 }
[8869]378 else if (name.equals(GSXML.NOTIFY_ELEM))
379 {
380 this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT);
381 }
[5798]382 // TODO: other elements - make a factory-method approach here...
383 else
384 {
385 }
386 }
387 }
388 catch (FactoryConfigurationError e) {
389 System.out.println(e);
390 }
391 catch (ParserConfigurationException ex) {
392 System.out.println(ex);
393 }
394 catch (SAXException ex) {
395 System.out.println(ex);
396 }
397 catch (IOException ex)
398 {
399 System.out.println(ex);
400 }
401
402 System.out.println("<<<Obtaining database>>>>");
403 }
404
[6099]405 public String getEtcDirectory()
[6870]406 { return GSFile.collectionEtcDir(this.collectionHome);
[6099]407 }
408
[5943]409 public String getImportDirectory()
[6870]410 { return GSFile.collectionImportDir(this.collectionHome);
[5943]411 }
[5798]412
[5943]413 public String getBuildDirectory()
[6870]414 { return GSFile.collectionBuildDir(this.collectionHome);
[5943]415 }
[5798]416
[6009]417 public String getArchiveDirectory()
[6870]418 { return GSFile.collectionArchiveDir(this.collectionHome);
[6009]419 }
420
[5943]421 public GS3SQLConnection getDatabase()
422 {
423 return this.database;
424 }
425
[8859]426 public long getBuildTimestamp()
427 { return this.lastBuildDate.getTime().getTime();
428 }
429
[8408]430 public Date getBuildDate()
431 { return this.lastBuildDate.getTime();
432 }
433
[5943]434 public void startBuild()
435 { GregorianCalendar today = new GregorianCalendar();
436
437 if (this.lastBuildDate != null)
438 { // if the build date is different to the last build date, then reset the build
439 // document number
440 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
441 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
442 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
443 { this.buildDocNo = 1;
444 }
[8422]445 else
446 { System.out.println("Continuing build sequence from " + this.buildDocNo);
447 }
[5798]448 }
[5943]449 this.lastBuildDate = today;
450 }
[5798]451
[5943]452 public void endBuild()
453 {
[8422]454 // here we write out the build config file
455 // create the xml for the buildConfig
456 XMLConverter converter = new XMLConverter();
457 Document doc = converter.newDOM();
458 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
459 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
460 build_config.appendChild(meta_list);
461 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
462 build_config.appendChild(service_list);
463
[8969]464 // ask the indexers and classifiers to add stuff into the service rack list
[8422]465 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
466 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
467 // get the String
468 String build_config_string = converter.getString(build_config);
469 // write it to the file
470 try {
471 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
472 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
473 writer.write(build_config_string, 0, build_config_string.length());
474 writer.close();
475 } catch (Exception e) {
476 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
477 System.err.println(e.getMessage());
478 }
479
480 // Update build date information
481 GS3SQLDelete remove = new GS3SQLDelete("build");
482 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
483 // rem
484 this.database.execute(remove.toString());
485
486 GS3SQLInsert insert = new GS3SQLInsert("build");
487 insert.addValue("buildKey", "NextSeqNo");
488 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
489 this.database.execute(insert.toString());
490
491 insert = new GS3SQLInsert("build");
492 insert.addValue("buildKey", "lastBuildDate");
493 insert.addValue("buildValue", getDateString(this.lastBuildDate));
494 this.database.execute(insert.toString());
[6896]495
[8422]496 // Do tail of build output
[5943]497 Date startDate = this.lastBuildDate.getTime();
498 Date date = new Date();
[5798]499
[5943]500 long startTime = startDate.getTime();
501 long endTime = date.getTime();
[5798]502
[5943]503 long difference = ((endTime - startTime) + 500) / 1000;
[5798]504
[5943]505 System.out.println("Build completed");
506 System.out.println("---------------");
507 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
508 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
509 }
[5798]510
[8422]511 private static String getDateString(GregorianCalendar date)
512 { StringBuffer dateString = new StringBuffer();
513
[5943]514 int value;
[8422]515 dateString.append(date.get(Calendar.YEAR));
[5798]516
[5943]517 // the use of month is a little odd, hence the following
518 // code. Calendar.MONTH yields 0 = January, 1 = February,
519 // etc. hence there is a '+1' added to the month to make
520 // it into January = 1, etc., and the padding is altered
521 // correspondingly.
[8422]522 value = date.get(Calendar.MONTH);
[5943]523 if (value < 9)
[8422]524 { dateString.append("0");
[5943]525 }
[8422]526 dateString.append(value + 1);
527 value = date.get(Calendar.DAY_OF_MONTH);
[5943]528 if (value < 10)
[8422]529 dateString.append("0");
530 dateString.append(value);
[5798]531
[8422]532 return dateString.toString();
533 }
[5798]534
[8859]535
[8422]536 public String getNextDocumentID()
537 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
538
539 int value = this.buildDocNo;
[5943]540 this.buildDocNo ++;
541
[6009]542 ID.append(":");
[5943]543 ID.append(Integer.toString(value));
544 return ID.toString();
545 }
546
547 public int getDocumentNumber()
548 { this.buildDocNo ++;
549 return this.buildDocNo - 1;
550 }
[5798]551
[5943]552 /**
553 * Get the collection metadata item in the given namespace
554 *
555 * @param <code>String</code> the namespace
556 * @param <code>String</code> the label of the metadata
557 */
558 public String getCollectionMetadata(String namespace, String label)
559 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
560 }
[5798]561
[5943]562 /**
563 * Set the collection metadata item in the given namespace
564 *
565 * @param <code>String</code> the namespace
566 * @param <code>String</code> the label
567 * @param <code>String</code> the value
568 */
569 public void setCollectionMetadata(String namespace, String label, String value)
570 { this.metadata.setCollectionMetadata(namespace, label, value);
571 }
[8610]572
573 public String getCollectionName() {
574 return collectionName;
575 }
[8869]576
577/**
578 * @return
579 */
580public String getNotifyHost() {
581 return notifyHost;
[5943]582}
[8869]583}
[5798]584
Note: See TracBrowser for help on using the repository browser.