source: branches/alerting-branch/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8416

Last change on this file since 8416 was 8416, checked in by schweer, 20 years ago

proof-of-concept implementation for detecting new documents

  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import javax.xml.parsers.*;
22
23import org.w3c.dom.Document;
24import org.w3c.dom.Element;
25import org.w3c.dom.NamedNodeMap;
26import org.w3c.dom.Node;
27import org.w3c.dom.NodeList;
28import org.w3c.dom.Text;
29
30import org.xml.sax.SAXException;
31import org.xml.sax.SAXParseException;
32
33import org.greenstone.gsdl3.gs3build.collection.*;
34import org.greenstone.gsdl3.gs3build.classifier.*;
35import org.greenstone.gsdl3.gs3build.indexers.*;
36
37import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
38import org.greenstone.gsdl3.gs3build.util.GS3SQLConnectionFactory;
39import org.greenstone.gsdl3.gs3build.util.DOMUtils;
40import org.greenstone.gsdl3.gs3build.util.URLTools;
41
42import org.greenstone.gsdl3.util.GSFile;
43import org.greenstone.gsdl3.util.GSXML;
44import org.greenstone.gsdl3.util.XMLConverter;
45
46/**
47 * Store and hold collection-level configuration information for a collection.
48 * This should be used by BuildManager to work out which classes, etc. to load
49 * at build time, and as a repository for the collection-level metadata, and
50 * a means of loading and saving the same to a file or database, as is seen
51 * fit in the final development of gs3.
52 */
53
54public class CollectionManager
55{
56 GregorianCalendar lastBuildDate; // pretty obvious
57 String adminEmail; // the email address of the administrator of the
58 // collection
59 int buildDocNo; // used to generate document identifiers
60 CollectionMetadata metadata; // collection-level metadata
61 GS3SQLConnection database; // the database to store everything in
62 String collectionHome;
63 String siteHome;
64 String collectionName;
65 String qualifiedCollectionName; // used as the database name
66
67 BuildManager buildManager;
68
69 class CollectionClassifier
70 { URL file;
71 String type;
72 List fields;
73 String sort;
74 String className;
75
76 public CollectionClassifier(URL parentURL, String type, String className, Node node)
77 { this.type = type;
78 this.className = className;
79 this.fields = new ArrayList();
80
81 NodeList children = node.getChildNodes();
82 for (int c = 0; c < children.getLength(); c ++) {
83 Node child = children.item(c);
84
85 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
86 String name = child.getNodeName();
87
88 if (name.equals("file")) {
89 NamedNodeMap atts = children.item(c).getAttributes();
90 Node attribute = atts.getNamedItem("URL");
91 String urlString = attribute.getNodeValue();
92 if (urlString == null)
93 continue;
94
95 System.out.println("Path is" + parentURL.getPath());
96 System.out.println("Host is" + parentURL.getHost());
97 System.out.println(urlString);
98
99 try {
100 URL url = new URL(parentURL, urlString);
101 this.file = url;
102 System.out.println(url);
103 }
104 catch (java.net.MalformedURLException malEx) {
105 System.out.println(malEx);
106 }
107 }
108 else if (name.equals("field")) {
109 String fieldName = DOMUtils.getNodeChildText(children.item(c));
110 this.fields.add(fieldName.toString());
111 }
112 else if (name.equals("sort")) {
113 String sortName = DOMUtils.getNodeChildText(children.item(c));
114 this.sort = sortName;
115 }
116 }
117 }
118 }
119
120 public ClassifierInterface getClassifier()
121 { ClassifierInterface classifier = null;
122
123 if (this.type == null) {
124 return null;
125 }
126 System.out.println(this.type.toLowerCase());
127
128 if (this.type.toLowerCase().equals("hierarchy")) {
129 System.out.println(this.file);
130 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
131 }
132 else if (this.type.toLowerCase().equals("azlist")) {
133 classifier = new AZListClassifier(this.className, this.fields);
134 }
135
136 return classifier;
137 }
138 }
139
140 /**
141 * Create the collection manager for a given collection
142 *
143 * @param site the name of the site
144 * @param collection <code>String</code> the name of the collection
145 */
146 public CollectionManager(String site, String collection) {
147
148 String gsdl3Root = System.getProperty("GSDL3HOME");
149 if (gsdl3Root == null) {
150 System.out.println("Error: Unable to locate GSDL3HOME");
151 System.exit(1);
152 //return;
153 }
154
155 this.siteHome = GSFile.siteHome(gsdl3Root, site);
156 File site_dir = new File(this.siteHome);
157 System.out.println(site_dir);
158 if (!site_dir.exists()) {
159 System.out.println("Error: Non-existant site ("+site+") specified");
160 System.exit(1);
161 }
162 site_dir = null;
163 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
164
165 File collect_dir = new File(this.collectionHome);
166 if (!collect_dir.exists()) {
167 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
168 System.exit(1);
169 }
170 collect_dir = null;
171
172 this.collectionName = collection;
173 this.qualifiedCollectionName = site+"_"+collection;
174
175 this.database = GS3SQLConnectionFactory.createConnection(this.qualifiedCollectionName);
176 /* if (this.database != null) {
177 this.database.clearCollection(collection);
178 this.database = null;
179 }
180 */
181 if (this.database == null) {
182 this.database = GS3SQLConnectionFactory.createConnection("test");
183 this.database.initCollection(this.qualifiedCollectionName);
184 }
185
186 this.metadata = new CollectionMetadata();
187
188 File buildDirectory = new File(GSFile.collectionBuildDir(this.collectionHome));
189 if (!buildDirectory.exists()) {
190 buildDirectory.mkdir();
191 }
192
193 File archiveDirectory = new File(GSFile.collectionArchiveDir(this.collectionHome));
194 if (!archiveDirectory.exists()) {
195 archiveDirectory.mkdir();
196 }
197
198 this.buildDocNo = 1;
199 }
200
201 public void setBuildManager(BuildManager build_man)
202 { this.buildManager = build_man;
203 }
204
205 private void configureBrowsers(Node node, File etcFile)
206 { CollectionClassifier classifier = null;
207 URL etcURL = null;
208
209 etcURL = URLTools.getFileURL(etcFile);
210
211 NodeList children = node.getChildNodes();
212 for (int c = 0; c < children.getLength(); c ++)
213 { // assume that non-element children are irrelevant
214 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
215 { continue;
216 }
217
218 String name = children.item(c).getNodeName();
219 System.out.println(name);
220
221 if (name.equals(GSXML.CLASSIFIER_ELEM))
222 { NamedNodeMap atts = children.item(c).getAttributes();
223
224 // get the type attribute
225 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
226 if (attribute == null) {
227 continue;
228 }
229 String type = attribute.getNodeValue();
230
231 // get the type attribute
232 attribute = atts.getNamedItem(GSXML.NAME_ATT);
233 if (attribute == null) {
234 continue;
235 }
236 String className = attribute.getNodeValue();
237
238 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
239
240 System.out.println("Found classifier " + type);
241
242 // attach the classifier
243 ClassifierInterface classify = classifier.getClassifier();
244 this.buildManager.getClassifierManager().addClassifier(classify);
245 }
246 }
247 }
248
249 public void configureCollection()
250 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
251
252 // get the File and read it in
253 try
254 {
255 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
256 DocumentBuilder builder = factory.newDocumentBuilder();
257 Document document = builder.parse(collectionConfig);
258
259 // TODO: report an error
260 if (document == null)
261 {
262 }
263
264 // now parse the manager file...
265 Element rootElement = document.getDocumentElement();
266
267 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
268 { // TODO: throw exception
269 }
270
271 System.out.println("Configuring collection");
272
273 NodeList children = rootElement.getChildNodes();
274 for (int c = 0; c < children.getLength(); c ++)
275 { // assume that non-element children are irrelevant
276 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
277 { continue;
278 }
279
280 String name = children.item(c).getNodeName();
281
282 // the name is a plugin element
283
284 if (name.equals("doctype")) {
285 System.out.println("document type");
286 NamedNodeMap typeAttributes = children.item(c).getAttributes();
287 Node typeAttribute = typeAttributes.getNamedItem("type");
288 String documentType = null;
289
290 NodeList childNodes = children.item(c).getChildNodes();
291 for (int n = 0; n < childNodes.getLength(); n ++)
292 { if (childNodes.item(n).getNodeType() == org.w3c.dom.Node.TEXT_NODE)
293 { String label = childNodes.item(n).getNodeValue();
294 label.trim();
295 if (label.length() > 0) {
296 documentType = label;
297 System.out.println("Document type " + documentType);
298 }
299 }
300 }
301 }
302 else if (name.equals(GSXML.SEARCH_ELEM)) {
303 // pick up attributes from the <search> tag now...
304 NamedNodeMap searchAttributes = children.item(c).getAttributes();
305 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
306 String searchType = null;
307 if (searchAttribute != null) {
308 searchType = searchAttribute.getNodeValue();
309 } else {
310 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
311 searchType = MGIndexer.MG_INDEX_TYPE;
312 }
313
314 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
315 String searchName = null;
316 if (searchAttribute != null) {
317 searchName = searchAttribute.getNodeValue();
318 }
319 if (searchName == null) {
320 searchName = "idx"; // need to modify this if we have two search elements with no names
321 }
322 // create the pertinent indexer...
323 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
324
325 if (indexer == null) {
326 continue;
327 }
328
329 // configure the indexer
330 indexer.configure(children.item(c));
331
332 // install it into the build manager
333 this.buildManager.addIndexer(indexer);
334 }
335 else if (name.equals(GSXML.BROWSE_ELEM))
336 { this.configureBrowsers(children.item(c), collectionConfig);
337 }
338 // TODO: other elements - make a factory-method approach here...
339 else
340 {
341 }
342 }
343 }
344 catch (FactoryConfigurationError e) {
345 System.out.println(e);
346 }
347 catch (ParserConfigurationException ex) {
348 System.out.println(ex);
349 }
350 catch (SAXException ex) {
351 System.out.println(ex);
352 }
353 catch (IOException ex)
354 {
355 System.out.println(ex);
356 }
357
358 System.out.println("<<<Obtaining database>>>>");
359 }
360
361 public String getEtcDirectory()
362
363 { return GSFile.collectionEtcDir(this.collectionHome);
364 }
365
366 public String getImportDirectory()
367 { return GSFile.collectionImportDir(this.collectionHome);
368 }
369
370 public String getBuildDirectory()
371 { return GSFile.collectionBuildDir(this.collectionHome);
372 }
373
374 public GregorianCalendar getLastBuildDate(){
375 return lastBuildDate;
376 }
377
378 public String getArchiveDirectory()
379 { return GSFile.collectionArchiveDir(this.collectionHome);
380 }
381
382 public GS3SQLConnection getDatabase()
383 {
384 return this.database;
385 }
386
387 public Date getBuildDate()
388 { return this.lastBuildDate.getTime();
389 }
390
391 public void startBuild()
392 { GregorianCalendar today = new GregorianCalendar();
393
394 if (this.lastBuildDate != null)
395 { // if the build date is different to the last build date, then reset the build
396 // document number
397 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
398 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
399 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
400 { this.buildDocNo = 1;
401 }
402 }
403 this.lastBuildDate = today;
404 }
405
406 public void endBuild()
407 {
408 // here we write out the build config file
409 // create the xml for the buildConfig
410 XMLConverter converter = new XMLConverter();
411 Document doc = converter.newDOM();
412 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
413 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
414 build_config.appendChild(meta_list);
415 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
416 build_config.appendChild(service_list);
417
418 // the document structure and metadata retrieval will use GS3REtrieve service, so add it in here
419 Element base_retrieve_service = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
420 base_retrieve_service.setAttribute(GSXML.NAME_ATT, "GS3Retrieve");
421 service_list.appendChild(base_retrieve_service);
422 // ask the indexers to add stuff into the service rack list
423 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
424 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
425 // get the String
426 String build_config_string = converter.getString(build_config);
427 // write it to the file
428 try {
429 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
430 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
431 writer.write(build_config_string, 0, build_config_string.length());
432 writer.close();
433 } catch (Exception e) {
434 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
435 System.err.println(e.getMessage());
436 }
437 Date startDate = this.lastBuildDate.getTime();
438 Date date = new Date();
439
440 long startTime = startDate.getTime();
441 long endTime = date.getTime();
442
443 long difference = ((endTime - startTime) + 500) / 1000;
444
445 System.out.println("Build completed");
446 System.out.println("---------------");
447 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
448 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
449 }
450
451 public String getNextDocumentID()
452 { StringBuffer ID = new StringBuffer();
453
454 int value;
455 ID.append(lastBuildDate.get(Calendar.YEAR));
456
457 // the use of month is a little odd, hence the following
458 // code. Calendar.MONTH yields 0 = January, 1 = February,
459 // etc. hence there is a '+1' added to the month to make
460 // it into January = 1, etc., and the padding is altered
461 // correspondingly.
462 value = lastBuildDate.get(Calendar.MONTH);
463 if (value < 9)
464 { ID.append("0");
465 }
466 ID.append(value + 1);
467 value = lastBuildDate.get(Calendar.DAY_OF_MONTH);
468 if (value < 10)
469 ID.append("0");
470 ID.append(value);
471
472
473 value = this.buildDocNo;
474 this.buildDocNo ++;
475
476 ID.append(":");
477 ID.append(Integer.toString(value));
478 return ID.toString();
479 }
480
481 public int getDocumentNumber()
482 { this.buildDocNo ++;
483 return this.buildDocNo - 1;
484 }
485
486 /**
487 * Get the collection metadata item in the given namespace
488 *
489 * @param <code>String</code> the namespace
490 * @param <code>String</code> the label of the metadata
491 */
492 public String getCollectionMetadata(String namespace, String label)
493 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
494 }
495
496 /**
497 * Set the collection metadata item in the given namespace
498 *
499 * @param <code>String</code> the namespace
500 * @param <code>String</code> the label
501 * @param <code>String</code> the value
502 */
503 public void setCollectionMetadata(String namespace, String label, String value)
504 { this.metadata.setCollectionMetadata(namespace, label, value);
505 }
506}
507
Note: See TracBrowser for help on using the repository browser.