source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8969

Last change on this file since 8969 was 8969, checked in by kjdon, 19 years ago

don't add in GS3Retrieve to service rack list anymore

  • Property svn:keywords set to Author Date Id Revision
File size: 18.0 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import java.sql.*;
22
23import javax.xml.parsers.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.Text;
31
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34
35import org.greenstone.gsdl3.gs3build.database.*;
36import org.greenstone.gsdl3.gs3build.collection.*;
37import org.greenstone.gsdl3.gs3build.classifier.*;
38import org.greenstone.gsdl3.gs3build.indexers.*;
39import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface;
40
41import org.greenstone.gsdl3.gs3build.util.DOMUtils;
42import org.greenstone.gsdl3.gs3build.util.URLTools;
43
44import org.greenstone.gsdl3.util.GSFile;
45import org.greenstone.gsdl3.util.GSXML;
46import org.greenstone.gsdl3.util.XMLConverter;
47
48/**
49 * Store and hold collection-level configuration information for a collection.
50 * This should be used by BuildManager to work out which classes, etc. to load
51 * at build time, and as a repository for the collection-level metadata, and
52 * a means of loading and saving the same to a file or database, as is seen
53 * fit in the final development of gs3.
54 */
55
56public class CollectionManager
57{
58 GregorianCalendar lastBuildDate; // pretty obvious
59 String adminEmail; // the email address of the administrator of the
60 // collection
61 int buildDocNo; // used to generate document identifiers
62 CollectionMetadata metadata; // collection-level metadata
63 GS3SQLConnection database; // the database to store everything in
64 String collectionHome;
65 String siteHome;
66 String collectionName;
67 String qualifiedCollectionName; // used as the database name
68 String notifyHost;
69
70 BuildManager buildManager;
71
72 class CollectionClassifier
73 { URL file;
74 String type;
75 List fields;
76 String sort;
77 String className;
78
79 public CollectionClassifier(URL parentURL, String type, String className, Node node)
80 { this.type = type;
81 this.className = className;
82 this.fields = new ArrayList();
83
84 NodeList children = node.getChildNodes();
85 for (int c = 0; c < children.getLength(); c ++) {
86 Node child = children.item(c);
87
88 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
89 String name = child.getNodeName();
90
91 if (name.equals("file")) {
92 NamedNodeMap atts = children.item(c).getAttributes();
93 Node attribute = atts.getNamedItem("URL");
94 String urlString = attribute.getNodeValue();
95 if (urlString == null)
96 continue;
97
98 System.out.println("Path is" + parentURL.getPath());
99 System.out.println("Host is" + parentURL.getHost());
100 System.out.println(urlString);
101
102 try {
103 URL url = new URL(parentURL, urlString);
104 this.file = url;
105 System.out.println(url);
106 }
107 catch (java.net.MalformedURLException malEx) {
108 System.out.println(malEx);
109 }
110 }
111 else if (name.equals("field")) {
112 String fieldName = DOMUtils.getNodeChildText(children.item(c));
113 this.fields.add(fieldName.toString());
114 }
115 else if (name.equals("sort")) {
116 String sortName = DOMUtils.getNodeChildText(children.item(c));
117 this.sort = sortName;
118 }
119 }
120 }
121 }
122
123 public ClassifierInterface getClassifier()
124 { ClassifierInterface classifier = null;
125
126 if (this.type == null) {
127 return null;
128 }
129 System.out.println("Creating a classifier of type " + this.type.toLowerCase());
130
131 if (this.type.toLowerCase().equals("hierarchy")) {
132 System.out.println(" hierarchy file is " + this.file);
133 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
134 }
135 else if (this.type.toLowerCase().equals("azlist")) {
136 classifier = new AZListClassifier(this.className, this.fields);
137 }
138
139 return classifier;
140 }
141 }
142
143 /**
144 * Create the collection manager for a given collection
145 *
146 * @param site the name of the site
147 * @param collection <code>String</code> the name of the collection
148 */
149 public CollectionManager(String site, String collection) {
150
151 String gsdl3Root = System.getProperty("GSDL3HOME");
152 if (gsdl3Root == null) {
153 System.out.println("Error: Unable to locate GSDL3HOME");
154 System.exit(1);
155 //return;
156 }
157
158 this.siteHome = GSFile.siteHome(gsdl3Root, site);
159 File site_dir = new File(this.siteHome);
160 System.out.println(site_dir);
161 if (!site_dir.exists()) {
162 System.out.println("Error: Non-existant site ("+site+") specified");
163 System.exit(1);
164 }
165 site_dir = null;
166 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
167
168 File collect_dir = new File(this.collectionHome);
169 if (!collect_dir.exists()) {
170 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
171 System.exit(1);
172 }
173 collect_dir = null;
174
175 this.collectionName = collection;
176 this.qualifiedCollectionName = site+"_"+collection;
177
178 this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName);
179 /* if (this.database != null) {
180 this.database.clearCollection(collection);
181 this.database = null;
182 }
183 */
184 if (this.database == null) {
185 this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test");
186 boolean success = this.database.initCollection(this.qualifiedCollectionName);
187 if (!success) {
188 System.err.println("couldn't init collection " + this.qualifiedCollectionName);
189 System.exit(1);
190 }
191 }
192
193 this.metadata = new CollectionMetadata();
194
195 File buildDirectory = new File(getBuildDirectory());
196 if (!buildDirectory.exists()) {
197 buildDirectory.mkdir();
198 }
199 if (!buildDirectory.isDirectory()) {
200 System.err.println("Unable to open directory " + buildDirectory + " for writing");
201 System.exit(1);
202 }
203
204 File archiveDirectory = new File(getArchiveDirectory());
205 if (!archiveDirectory.exists()) {
206 archiveDirectory.mkdir();
207 }
208 if (!archiveDirectory.isDirectory()) {
209 System.err.println("Unable to open directory " + archiveDirectory + " for writing");
210 System.exit(1);
211 }
212
213 this.buildDocNo = 1;
214
215 try {
216 GS3SQLSelect select = new GS3SQLSelect("build");
217 select.addField("*");
218 this.database.execute(select.toString());
219 ResultSet results = this.database.getResultSet();
220 if (results != null &&
221 results.first()) {
222 System.out.println("Reading all keys");
223 do {
224 String key = results.getString("buildKey");
225 String value = results.getString("buildValue");
226
227 if (key.equals("NextSeqNo")) {
228 this.buildDocNo = Integer.parseInt(value);
229 }
230 else if (key.equals("lastBuildDate")) {
231 int year, month, day;
232 year = Integer.parseInt(value.substring(0, 4));
233 month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc.
234 day = Integer.parseInt(value.substring(6, 8));
235
236 this.lastBuildDate = new GregorianCalendar(year, month, day);
237 }
238 } while (results.next());
239 }
240 }
241 catch (SQLException ex)
242 { System.out.println(ex);
243 }
244 }
245
246 public void setBuildManager(BuildManager build_man)
247 { this.buildManager = build_man;
248 }
249
250 private void configureBrowsers(Node node, File etcFile)
251 { CollectionClassifier classifier = null;
252 URL etcURL = null;
253
254 etcURL = URLTools.getFileURL(etcFile);
255
256 NodeList children = node.getChildNodes();
257 for (int c = 0; c < children.getLength(); c ++)
258 { // assume that non-element children are irrelevant
259 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
260 { continue;
261 }
262
263 String name = children.item(c).getNodeName();
264 System.out.println(name);
265
266 if (name.equals(GSXML.CLASSIFIER_ELEM))
267 { NamedNodeMap atts = children.item(c).getAttributes();
268
269 // get the type attribute
270 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
271 if (attribute == null) {
272 continue;
273 }
274 String type = attribute.getNodeValue();
275
276 // get the type attribute
277 attribute = atts.getNamedItem(GSXML.NAME_ATT);
278 if (attribute == null) {
279 continue;
280 }
281 String className = attribute.getNodeValue();
282
283 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
284
285 System.out.println("Found classifier " + type);
286
287 // attach the classifier
288 ClassifierInterface classify = classifier.getClassifier();
289 this.buildManager.getClassifierManager().addClassifier(classify);
290 }
291 }
292 }
293
294 public void configureCollection()
295 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
296
297 // get the File and read it in
298 try
299 {
300 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
301 DocumentBuilder builder = factory.newDocumentBuilder();
302 Document document = builder.parse(collectionConfig);
303
304 // TODO: report an error
305 if (document == null)
306 {
307 }
308
309 // now parse the manager file...
310 Element rootElement = document.getDocumentElement();
311
312 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
313 { // TODO: throw exception
314 }
315
316 System.out.println("Configuring collection");
317
318 NodeList children = rootElement.getChildNodes();
319 for (int c = 0; c < children.getLength(); c ++)
320 { // assume that non-element children are irrelevant
321 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
322 { continue;
323 }
324
325 String name = children.item(c).getNodeName();
326
327 // the name is a plugin element
328
329 if (name.equals(GSXML.RECOGNISE_ELEM)) {
330 System.out.println("recognise elem");
331
332 NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM);
333 for(int i=0; i<doc_types.getLength(); i++) {
334 Element doc_type = (Element)doc_types.item(i);
335 String type = doc_type.getAttribute(GSXML.NAME_ATT);
336 System.err.println("adding recogniser, type "+type);
337 RecogniserInterface ri = this.buildManager.getRecogniserManager().addRecogniser(type);
338 if (ri != null) {
339 ri.configure(doc_type);
340 }
341 }
342 } else if (name.equals(GSXML.SEARCH_ELEM)) {
343 // pick up attributes from the <search> tag now...
344 NamedNodeMap searchAttributes = children.item(c).getAttributes();
345 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
346 String searchType = null;
347 if (searchAttribute != null) {
348 searchType = searchAttribute.getNodeValue();
349 } else {
350 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
351 searchType = MGIndexer.MG_INDEX_TYPE;
352 }
353
354 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
355 String searchName = null;
356 if (searchAttribute != null) {
357 searchName = searchAttribute.getNodeValue();
358 }
359 if (searchName == null) {
360 searchName = "idx"; // need to modify this if we have two search elements with no names
361 }
362 // create the pertinent indexer...
363 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
364
365 if (indexer == null) {
366 continue;
367 }
368
369 // configure the indexer
370 indexer.configure(children.item(c));
371
372 // install it into the build manager
373 this.buildManager.addIndexer(indexer);
374 }
375 else if (name.equals(GSXML.BROWSE_ELEM))
376 { this.configureBrowsers(children.item(c), collectionConfig);
377 }
378 else if (name.equals(GSXML.NOTIFY_ELEM))
379 {
380 this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT);
381 }
382 // TODO: other elements - make a factory-method approach here...
383 else
384 {
385 }
386 }
387 }
388 catch (FactoryConfigurationError e) {
389 System.out.println(e);
390 }
391 catch (ParserConfigurationException ex) {
392 System.out.println(ex);
393 }
394 catch (SAXException ex) {
395 System.out.println(ex);
396 }
397 catch (IOException ex)
398 {
399 System.out.println(ex);
400 }
401
402 System.out.println("<<<Obtaining database>>>>");
403 }
404
405 public String getEtcDirectory()
406 { return GSFile.collectionEtcDir(this.collectionHome);
407 }
408
409 public String getImportDirectory()
410 { return GSFile.collectionImportDir(this.collectionHome);
411 }
412
413 public String getBuildDirectory()
414 { return GSFile.collectionBuildDir(this.collectionHome);
415 }
416
417 public String getArchiveDirectory()
418 { return GSFile.collectionArchiveDir(this.collectionHome);
419 }
420
421 public GS3SQLConnection getDatabase()
422 {
423 return this.database;
424 }
425
426 public long getBuildTimestamp()
427 { return this.lastBuildDate.getTime().getTime();
428 }
429
430 public Date getBuildDate()
431 { return this.lastBuildDate.getTime();
432 }
433
434 public void startBuild()
435 { GregorianCalendar today = new GregorianCalendar();
436
437 if (this.lastBuildDate != null)
438 { // if the build date is different to the last build date, then reset the build
439 // document number
440 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
441 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
442 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
443 { this.buildDocNo = 1;
444 }
445 else
446 { System.out.println("Continuing build sequence from " + this.buildDocNo);
447 }
448 }
449 this.lastBuildDate = today;
450 }
451
452 public void endBuild()
453 {
454 // here we write out the build config file
455 // create the xml for the buildConfig
456 XMLConverter converter = new XMLConverter();
457 Document doc = converter.newDOM();
458 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
459 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
460 build_config.appendChild(meta_list);
461 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
462 build_config.appendChild(service_list);
463
464 // ask the indexers and classifiers to add stuff into the service rack list
465 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
466 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
467 // get the String
468 String build_config_string = converter.getString(build_config);
469 // write it to the file
470 try {
471 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
472 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
473 writer.write(build_config_string, 0, build_config_string.length());
474 writer.close();
475 } catch (Exception e) {
476 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
477 System.err.println(e.getMessage());
478 }
479
480 // Update build date information
481 GS3SQLDelete remove = new GS3SQLDelete("build");
482 // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo"));
483 // rem
484 this.database.execute(remove.toString());
485
486 GS3SQLInsert insert = new GS3SQLInsert("build");
487 insert.addValue("buildKey", "NextSeqNo");
488 insert.addValue("buildValue", Integer.toString(this.buildDocNo));
489 this.database.execute(insert.toString());
490
491 insert = new GS3SQLInsert("build");
492 insert.addValue("buildKey", "lastBuildDate");
493 insert.addValue("buildValue", getDateString(this.lastBuildDate));
494 this.database.execute(insert.toString());
495
496 // Do tail of build output
497 Date startDate = this.lastBuildDate.getTime();
498 Date date = new Date();
499
500 long startTime = startDate.getTime();
501 long endTime = date.getTime();
502
503 long difference = ((endTime - startTime) + 500) / 1000;
504
505 System.out.println("Build completed");
506 System.out.println("---------------");
507 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
508 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
509 }
510
511 private static String getDateString(GregorianCalendar date)
512 { StringBuffer dateString = new StringBuffer();
513
514 int value;
515 dateString.append(date.get(Calendar.YEAR));
516
517 // the use of month is a little odd, hence the following
518 // code. Calendar.MONTH yields 0 = January, 1 = February,
519 // etc. hence there is a '+1' added to the month to make
520 // it into January = 1, etc., and the padding is altered
521 // correspondingly.
522 value = date.get(Calendar.MONTH);
523 if (value < 9)
524 { dateString.append("0");
525 }
526 dateString.append(value + 1);
527 value = date.get(Calendar.DAY_OF_MONTH);
528 if (value < 10)
529 dateString.append("0");
530 dateString.append(value);
531
532 return dateString.toString();
533 }
534
535
536 public String getNextDocumentID()
537 { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate));
538
539 int value = this.buildDocNo;
540 this.buildDocNo ++;
541
542 ID.append(":");
543 ID.append(Integer.toString(value));
544 return ID.toString();
545 }
546
547 public int getDocumentNumber()
548 { this.buildDocNo ++;
549 return this.buildDocNo - 1;
550 }
551
552 /**
553 * Get the collection metadata item in the given namespace
554 *
555 * @param <code>String</code> the namespace
556 * @param <code>String</code> the label of the metadata
557 */
558 public String getCollectionMetadata(String namespace, String label)
559 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
560 }
561
562 /**
563 * Set the collection metadata item in the given namespace
564 *
565 * @param <code>String</code> the namespace
566 * @param <code>String</code> the label
567 * @param <code>String</code> the value
568 */
569 public void setCollectionMetadata(String namespace, String label, String value)
570 { this.metadata.setCollectionMetadata(namespace, label, value);
571 }
572
573 public String getCollectionName() {
574 return collectionName;
575 }
576
577/**
578 * @return
579 */
580public String getNotifyHost() {
581 return notifyHost;
582}
583}
584
Note: See TracBrowser for help on using the repository browser.