source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 12188

Last change on this file since 12188 was 12188, checked in by kjdon, 18 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 18.5 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import java.sql.*;
22
23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
35import org.greenstone.gsdl3.gs3build.database.*;
36import org.greenstone.gsdl3.gs3build.collection.*;
37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
40
41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
42import org.greenstone.gsdl3.gs3build.util.URLTools;
43
44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
46import org.greenstone.gsdl3.util.XMLConverter;
47
48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
64 String collectionHome;
65 String siteHome;
66 String collectionName;
67 String qualifiedCollectionName; // used as the database name
68 String notifyHost;
69
70 BuildManager buildManager;
71
72 class CollectionClassifier
73 { URL file;
74 String type;
75 List fields;
76 String sort;
77 String className;
78
79 public CollectionClassifier(URL parentURL, String type, String className, Node node)
80 { this.type = type;
81 this.className = className;
82 this.fields = new ArrayList();
83
84 NodeList children = node.getChildNodes();
85 for (int c = 0; c < children.getLength(); c ++) {
86 Node child = children.item(c);
87
88 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
89 String name = child.getNodeName();
90
91 if (name.equals("file")) {
92 NamedNodeMap atts = children.item(c).getAttributes();
93 Node attribute = atts.getNamedItem("URL");
94 String urlString = attribute.getNodeValue();
95 if (urlString == null)
96 continue;
97
98 System.out.println("Path is" + parentURL.getPath());
99 System.out.println("Host is" + parentURL.getHost());
100 System.out.println(urlString);
101
102 try {
103 URL url = new URL(parentURL, urlString);
104 this.file = url;
105 System.out.println(url);
106 }
107 catch (java.net.MalformedURLException malEx) {
108 System.out.println(malEx);
109 }
110 }
111 else if (name.equals("field")) {
112 String fieldName = DOMUtils.getNodeChildText(children.item(c));
113 this.fields.add(fieldName.toString());
114 }
115 else if (name.equals("sort")) {
116 String sortName = DOMUtils.getNodeChildText(children.item(c));
117 this.sort = sortName;
118 }
119 }
120 }
121 }
122
123 public ClassifierInterface getClassifier()
124 { ClassifierInterface classifier = null;
125
126 if (this.type == null) {
127 return null;
128 }
129 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
130
131 if (this.type.toLowerCase().equals("hierarchy")) {
132 System.out.println(" hierarchy file is " + this.file);
133 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
134 }
135 else if (this.type.toLowerCase().equals("azlist")) {
136 classifier = new AZListClassifier(this.className, this.fields);
137 }
138
139 return classifier;
140 }
141 }
142
143 /**
144 * Create the collection manager for a given collection
145 *
146 * @param site the name of the site
147 * @param collection <code>String</code> the name of the collection
148 */
149 public CollectionManager(String site, String collection) {
150
151 String gsdl3Root = System.getProperty("GSDL3HOME");
152 if (gsdl3Root == null) {
153 System.out.println("Error: Unable to locate GSDL3HOME");
154 System.exit(1);
155 //return;
156 }
157
158 this.siteHome = GSFile.siteHome(gsdl3Root, site);
159 File site_dir = new File(this.siteHome);
160 System.out.println(site_dir);
161 if (!site_dir.exists()) {
162 System.out.println("Error: Non-existant site ("+site+") specified");
163 System.exit(1);
164 }
165 site_dir = null;
166 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
167
168 File collect_dir = new File(this.collectionHome);
169 if (!collect_dir.exists()) {
170 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
171 System.exit(1);
172 }
173 collect_dir = null;
174
175 this.collectionName = collection;
176 this.qualifiedCollectionName = site+"_"+collection;
177
178 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
179 /* if (this.database != null) {
180 this.database.clearCollection(collection);
181 this.database = null;
182 }
183 */
184 if (this.database == null) {
185 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
186 if (this.database == null) {
187 System.err.println("Can't connect to the mysql database. Please make sure your mysql is running, and that the correct passwords (if any) are specified in "+gsdl3Root+"/WEB-INF/classes/global.properties");
188 System.exit(1);
189 }
190 boolean success = this.database.initCollection(this.qualifiedCollectionName);
191 if (!success) {
192 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
193 System.exit(1);
194 }
195 }
196
197 this.metadata = new CollectionMetadata();
198
199 File buildDirectory = new File(getBuildDirectory());
200 if (!buildDirectory.exists()) {
201 buildDirectory.mkdir();
202 }
203 if (!buildDirectory.isDirectory()) {
204 System.err.println("Unable to open directory " + buildDirectory + " for writing");
205 System.exit(1);
206 }
207
208 File archiveDirectory = new File(getArchiveDirectory());
209 if (!archiveDirectory.exists()) {
210 archiveDirectory.mkdir();
211 }
212 if (!archiveDirectory.isDirectory()) {
213 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
214 System.exit(1);
215 }
216
217 this.buildDocNo = 1;
218
219 try {
220 GS3SQLSelect select = new GS3SQLSelect("build");
221 select.addField("*");
222 Statement statement = this.database.createStatement();
223 ResultSet results = statement.executeQuery(select.toString());
224 if (results.first()) {
225 System.out.println("Reading all keys");
226 do {
227 String key = results.getString("buildKey");
228 String value = results.getString("buildValue");
229
230 if (key.equals("NextSeqNo")) {
231 this.buildDocNo = Integer.parseInt(value);
232 }
233 else if (key.equals("lastBuildDate")) {
234 int year, month, day;
235 year = Integer.parseInt(value.substring(0, 4));
236 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
237 day = Integer.parseInt(value.substring(6, 8));
238
239 this.lastBuildDate = new GregorianCalendar(year, month, day);
240 }
241 } while (results.next());
242 }
243 statement.close();
244 }
245 catch (SQLException ex)
246 { System.out.println(ex);
247 }
248 }
249
250 public void setBuildManager(BuildManager build_man)
251 { this.buildManager = build_man;
252 }
253
254 private void configureBrowsers(Node node, File etcFile)
255 { CollectionClassifier classifier = null;
256 URL etcURL = null;
257
258 etcURL = URLTools.getFileURL(etcFile);
259
260 NodeList children = node.getChildNodes();
261 for (int c = 0; c < children.getLength(); c ++)
262 { // assume that non-element children are irrelevant
263 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
264 { continue;
265 }
266
267 String name = children.item(c).getNodeName();
268 System.out.println(name);
269
270 if (name.equals(GSXML.CLASSIFIER_ELEM))
271 { NamedNodeMap atts = children.item(c).getAttributes();
272
273 // get the type attribute
274 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
275 if (attribute == null) {
276 continue;
277 }
278 String type = attribute.getNodeValue();
279
280 // get the type attribute
281 attribute = atts.getNamedItem(GSXML.NAME_ATT);
282 if (attribute == null) {
283 continue;
284 }
285 String className = attribute.getNodeValue();
286
287 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
288
289 System.out.println("Found classifier " + type);
290
291 // attach the classifier
292 ClassifierInterface classify = classifier.getClassifier();
293 this.buildManager.getClassifierManager().addClassifier(classify);
294 }
295 }
296 }
297
298 public void configureCollection()
299 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
300
301 // get the File and read it in
302 try
303 {
304 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
305 DocumentBuilder builder = factory.newDocumentBuilder();
306 Document document = builder.parse(collectionConfig);
307
308 // TODO: report an error
309 if (document == null)
310 {
311 }
312
313 // now parse the manager file...
314 Element rootElement = document.getDocumentElement();
315
316 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
317 { // TODO: throw exception
318 }
319
320 System.out.println("Configuring collection");
321
322 NodeList children = rootElement.getChildNodes();
323 for (int c = 0; c < children.getLength(); c ++)
324 { // assume that non-element children are irrelevant
325 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
326 { continue;
327 }
328
329 String name = children.item(c).getNodeName();
330
331 // the name is a plugin element
332
333 if (name.equals(GSXML.RECOGNISE_ELEM)) {
334 System.out.println("recognise elem");
335
336 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
337 for(int i=0; i<doc_types.getLength(); i++) {
338 Element doc_type = (Element)doc_types.item(i);
339 String type = doc_type.getAttribute(GSXML.NAME_ATT);
340 System.err.println("adding recogniser, type "+type);
341 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
342 if (ri != null) {
343 ri.configure(doc_type);
344 }
345 }
346 } else if (name.equals(GSXML.SEARCH_ELEM)) {
347 // pick up attributes from the <search> tag now...
348 NamedNodeMap searchAttributes = children.item(c).getAttributes();
349 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
350 String searchType = null;
351 if (searchAttribute != null) {
352 searchType = searchAttribute.getNodeValue();
353 } else {
354 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
355 searchType = MGIndexer.MG_INDEX_TYPE;
356 }
357
358 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
359 String searchName = null;
360 if (searchAttribute != null) {
361 searchName = searchAttribute.getNodeValue();
362 }
363 if (searchName == null) {
364 searchName = "idx"; // need to modify this if we have two search elements with no names
365 }
366 // create the pertinent indexer...
367 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
368
369 if (indexer == null) {
370 continue;
371 }
372
373 // configure the indexer
374 indexer.configure(children.item(c));
375
376 // install it into the build manager
377 this.buildManager.addIndexer(indexer);
378 }
379 else if (name.equals(GSXML.BROWSE_ELEM))
380 { this.configureBrowsers(children.item(c), collectionConfig);
381 }
382 else if (name.equals(GSXML.NOTIFY_ELEM))
383 {
384 this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT);
385 }
386 // TODO: other elements - make a factory-method approach here...
387 else
388 {
389 }
390 }
391 }
392 catch (FactoryConfigurationError e) {
393 System.out.println(e);
394 }
395 catch (ParserConfigurationException ex) {
396 System.out.println(ex);
397 }
398 catch (SAXException ex) {
399 System.out.println(ex);
400 }
401 catch (IOException ex)
402 {
403 System.out.println(ex);
404 }
405
406 System.out.println("<<<Obtaining database>>>>");
407 }
408
409 public String getEtcDirectory()
410 { return GSFile.collectionEtcDir(this.collectionHome);
411 }
412
413 public String getImportDirectory()
414 { return GSFile.collectionImportDir(this.collectionHome);
415 }
416
417 public String getBuildDirectory()
418 { return GSFile.collectionBuildDir(this.collectionHome);
419 }
420
421 public String getArchiveDirectory()
422 { return GSFile.collectionArchiveDir(this.collectionHome);
423 }
424
425 public GS3SQLConnection getDatabase()
426 {
427 return this.database;
428 }
429
430 public long getBuildTimestamp()
431 { return this.lastBuildDate.getTime().getTime();
432 }
433
434 public Date getBuildDate()
435 { return this.lastBuildDate.getTime();
436 }
437
438 public void startBuild()
439 { GregorianCalendar today = new GregorianCalendar();
440
441 if (this.lastBuildDate != null)
442 { // if the build date is different to the last build date, then reset the build
443 // document number
444 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
445 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
446 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
447 { this.buildDocNo = 1;
448 }
449 else
450 { System.out.println("Continuing build sequence from " + this.buildDocNo);
451 }
452 }
453 this.lastBuildDate = today;
454 }
455
456 public void endBuild()
457 {
458 // here we write out the build config file
459 // create the xml for the buildConfig
460 XMLConverter converter = new XMLConverter();
461 Document doc = converter.newDOM();
462 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
463 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
464 build_config.appendChild(meta_list);
465 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
466 build_config.appendChild(service_list);
467
468 // ask the indexers and classifiers to add stuff into the service rack list
469 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
470 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
471 // get the String
472 String build_config_string = converter.getString(build_config);
473 // write it to the file
474 try {
475 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
476 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
477 writer.write(build_config_string, 0, build_config_string.length());
478 writer.close();
479 } catch (Exception e) {
480 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
481 System.err.println(e.getMessage());
482 }
483
484 // Update build date information
485 try {
486 GS3SQLDelete remove = new GS3SQLDelete("build");
487 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
488 // rem
489 Statement statement = this.database.createStatement();
490 statement.execute(remove.toString());
491
492 GS3SQLInsert insert = new GS3SQLInsert("build");
493 insert.addValue("buildKey", "NextSeqNo");
494 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
495 statement.execute(insert.toString());
496
497 insert = new GS3SQLInsert("build");
498 insert.addValue("buildKey", "lastBuildDate");
499 insert.addValue("buildValue", getDateString(this.lastBuildDate));
500 statement.execute(insert.toString());
501 statement.close();
502 } catch (SQLException e) {
503 System.err.println("CollectionManager.endBuild(): Can't update build information: "+e);
504 }
505
506 // Do tail of build output
507 Date startDate = this.lastBuildDate.getTime();
508 Date date = new Date();
509
510 long startTime = startDate.getTime();
511 long endTime = date.getTime();
512
513 long difference = ((endTime - startTime) + 500) / 1000;
514
515 System.out.println("Build completed");
516 System.out.println("---------------");
517 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
518 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
519 }
520
521 private static String getDateString(GregorianCalendar date)
522 { StringBuffer dateString = new StringBuffer();
523
524 int value;
525 dateString.append(date.get(Calendar.YEAR));
526
527 // the use of month is a little odd, hence the following
528 // code. Calendar.MONTH yields 0 = January, 1 = February,
529 // etc. hence there is a '+1' added to the month to make
530 // it into January = 1, etc., and the padding is altered
531 // correspondingly.
532 value = date.get(Calendar.MONTH);
533 if (value < 9)
534 { dateString.append("0");
535 }
536 dateString.append(value + 1);
537 value = date.get(Calendar.DAY_OF_MONTH);
538 if (value < 10)
539 dateString.append("0");
540 dateString.append(value);
541
542 return dateString.toString();
543 }
544
545
546 public String getNextDocumentID()
547 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
548
549 int value = this.buildDocNo;
550 this.buildDocNo ++;
551
552 ID.append(":");
553 ID.append(Integer.toString(value));
554 return ID.toString();
555 }
556
557 public int getDocumentNumber()
558 { this.buildDocNo ++;
559 return this.buildDocNo - 1;
560 }
561
562 /**
563 * Get the collection metadata item in the given namespace
564 *
565 * @param <code>String</code> the namespace
566 * @param <code>String</code> the label of the metadata
567 */
568 public String getCollectionMetadata(String namespace, String label)
569 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
570 }
571
572 /**
573 * Set the collection metadata item in the given namespace
574 *
575 * @param <code>String</code> the namespace
576 * @param <code>String</code> the label
577 * @param <code>String</code> the value
578 */
579 public void setCollectionMetadata(String namespace, String label, String value)
580 { this.metadata.setCollectionMetadata(namespace, label, value);
581 }
582
583 public String getCollectionName() {
584 return collectionName;
585 }
586
587/**
588 * @return
589 */
590public String getNotifyHost() {
591 return notifyHost;
592}
593}
594
Note: See TracBrowser for help on using the repository browser.