source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java@ 8408

Last change on this file since 8408 was 8408, checked in by schweer, 20 years ago

George's changes to detect documents that are new or have changed since the last build process. (his CVS account currently doesn't work)

  • Property svn:keywords set to Author Date Id Revision
File size: 15.6 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import java.util.Date;
4import java.util.Calendar;
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Map;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.GregorianCalendar;
11
12import java.io.File;
13import java.io.IOException;
14import java.io.FileOutputStream;
15import java.io.BufferedWriter;
16import java.io.OutputStreamWriter;
17
18import java.net.URL;
19import java.net.URLEncoder;
20
21import javax.xml.parsers.*;
22
23import org.w3c.dom.Document;
24import org.w3c.dom.Element;
25import org.w3c.dom.NamedNodeMap;
26import org.w3c.dom.Node;
27import org.w3c.dom.NodeList;
28import org.w3c.dom.Text;
29
30import org.xml.sax.SAXException;
31import org.xml.sax.SAXParseException;
32
33import org.greenstone.gsdl3.gs3build.collection.*;
34import org.greenstone.gsdl3.gs3build.classifier.*;
35import org.greenstone.gsdl3.gs3build.indexers.*;
36
37import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
38import org.greenstone.gsdl3.gs3build.util.GS3SQLConnectionFactory;
39import org.greenstone.gsdl3.gs3build.util.DOMUtils;
40import org.greenstone.gsdl3.gs3build.util.URLTools;
41
42import org.greenstone.gsdl3.util.GSFile;
43import org.greenstone.gsdl3.util.GSXML;
44import org.greenstone.gsdl3.util.XMLConverter;
45
46/**
47 * Store and hold collection-level configuration information for a collection.
48 * This should be used by BuildManager to work out which classes, etc. to load
49 * at build time, and as a repository for the collection-level metadata, and
50 * a means of loading and saving the same to a file or database, as is seen
51 * fit in the final development of gs3.
52 */
53
54public class CollectionManager
55{
56 GregorianCalendar lastBuildDate; // pretty obvious
57 String adminEmail; // the email address of the administrator of the
58 // collection
59 int buildDocNo; // used to generate document identifiers
60 CollectionMetadata metadata; // collection-level metadata
61 GS3SQLConnection database; // the database to store everything in
62 String collectionHome;
63 String siteHome;
64 String collectionName;
65 String qualifiedCollectionName; // used as the database name
66
67 BuildManager buildManager;
68
69 class CollectionClassifier
70 { URL file;
71 String type;
72 List fields;
73 String sort;
74 String className;
75
76 public CollectionClassifier(URL parentURL, String type, String className, Node node)
77 { this.type = type;
78 this.className = className;
79 this.fields = new ArrayList();
80
81 NodeList children = node.getChildNodes();
82 for (int c = 0; c < children.getLength(); c ++) {
83 Node child = children.item(c);
84
85 if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
86 String name = child.getNodeName();
87
88 if (name.equals("file")) {
89 NamedNodeMap atts = children.item(c).getAttributes();
90 Node attribute = atts.getNamedItem("URL");
91 String urlString = attribute.getNodeValue();
92 if (urlString == null)
93 continue;
94
95 System.out.println("Path is" + parentURL.getPath());
96 System.out.println("Host is" + parentURL.getHost());
97 System.out.println(urlString);
98
99 try {
100 URL url = new URL(parentURL, urlString);
101 this.file = url;
102 System.out.println(url);
103 }
104 catch (java.net.MalformedURLException malEx) {
105 System.out.println(malEx);
106 }
107 }
108 else if (name.equals("field")) {
109 String fieldName = DOMUtils.getNodeChildText(children.item(c));
110 this.fields.add(fieldName.toString());
111 }
112 else if (name.equals("sort")) {
113 String sortName = DOMUtils.getNodeChildText(children.item(c));
114 this.sort = sortName;
115 }
116 }
117 }
118 }
119
120 public ClassifierInterface getClassifier()
121 { ClassifierInterface classifier = null;
122
123 if (this.type == null) {
124 return null;
125 }
126 System.out.println(this.type.toLowerCase());
127
128 if (this.type.toLowerCase().equals("hierarchy")) {
129 System.out.println(this.file);
130 classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort);
131 }
132 else if (this.type.toLowerCase().equals("azlist")) {
133 classifier = new AZListClassifier(this.className, this.fields);
134 }
135
136 return classifier;
137 }
138 }
139
140 /**
141 * Create the collection manager for a given collection
142 *
143 * @param site the name of the site
144 * @param collection <code>String</code> the name of the collection
145 */
146 public CollectionManager(String site, String collection) {
147
148 String gsdl3Root = System.getProperty("GSDL3HOME");
149 if (gsdl3Root == null) {
150 System.out.println("Error: Unable to locate GSDL3HOME");
151 System.exit(1);
152 //return;
153 }
154
155 this.siteHome = GSFile.siteHome(gsdl3Root, site);
156 File site_dir = new File(this.siteHome);
157 System.out.println(site_dir);
158 if (!site_dir.exists()) {
159 System.out.println("Error: Non-existant site ("+site+") specified");
160 System.exit(1);
161 }
162 site_dir = null;
163 this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection);
164
165 File collect_dir = new File(this.collectionHome);
166 if (!collect_dir.exists()) {
167 System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site);
168 System.exit(1);
169 }
170 collect_dir = null;
171
172 this.collectionName = collection;
173 this.qualifiedCollectionName = site+"_"+collection;
174
175 this.database = GS3SQLConnectionFactory.createConnection(this.qualifiedCollectionName);
176 /* if (this.database != null) {
177 this.database.clearCollection(collection);
178 this.database = null;
179 }
180 */
181 if (this.database == null) {
182 this.database = GS3SQLConnectionFactory.createConnection("test");
183 this.database.initCollection(this.qualifiedCollectionName);
184 }
185
186 this.metadata = new CollectionMetadata();
187
188 File buildDirectory = new File(GSFile.collectionBuildDir(this.collectionHome));
189 if (!buildDirectory.exists()) {
190 buildDirectory.mkdir();
191 }
192
193 File archiveDirectory = new File(GSFile.collectionArchiveDir(this.collectionHome));
194 if (!archiveDirectory.exists()) {
195 archiveDirectory.mkdir();
196 }
197
198 this.buildDocNo = 1;
199 }
200
201 public void setBuildManager(BuildManager build_man)
202 { this.buildManager = build_man;
203 }
204
205 private void configureBrowsers(Node node, File etcFile)
206 { CollectionClassifier classifier = null;
207 URL etcURL = null;
208
209 etcURL = URLTools.getFileURL(etcFile);
210
211 NodeList children = node.getChildNodes();
212 for (int c = 0; c < children.getLength(); c ++)
213 { // assume that non-element children are irrelevant
214 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
215 { continue;
216 }
217
218 String name = children.item(c).getNodeName();
219 System.out.println(name);
220
221 if (name.equals(GSXML.CLASSIFIER_ELEM))
222 { NamedNodeMap atts = children.item(c).getAttributes();
223
224 // get the type attribute
225 Node attribute = atts.getNamedItem(GSXML.TYPE_ATT);
226 if (attribute == null) {
227 continue;
228 }
229 String type = attribute.getNodeValue();
230
231 // get the type attribute
232 attribute = atts.getNamedItem(GSXML.NAME_ATT);
233 if (attribute == null) {
234 continue;
235 }
236 String className = attribute.getNodeValue();
237
238 classifier = new CollectionClassifier(etcURL, type, className, children.item(c));
239
240 System.out.println("Found classifier " + type);
241
242 // attach the classifier
243 ClassifierInterface classify = classifier.getClassifier();
244 this.buildManager.getClassifierManager().addClassifier(classify);
245 }
246 }
247 }
248
249 public void configureCollection()
250 { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome));
251
252 // get the File and read it in
253 try
254 {
255 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
256 DocumentBuilder builder = factory.newDocumentBuilder();
257 Document document = builder.parse(collectionConfig);
258
259 // TODO: report an error
260 if (document == null)
261 {
262 }
263
264 // now parse the manager file...
265 Element rootElement = document.getDocumentElement();
266
267 if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM)
268 { // TODO: throw exception
269 }
270
271 System.out.println("Configuring collection");
272
273 NodeList children = rootElement.getChildNodes();
274 for (int c = 0; c < children.getLength(); c ++)
275 { // assume that non-element children are irrelevant
276 if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE)
277 { continue;
278 }
279
280 String name = children.item(c).getNodeName();
281
282 // the name is a plugin element
283
284 if (name.equals("doctype")) {
285 System.out.println("document type");
286 NamedNodeMap typeAttributes = children.item(c).getAttributes();
287 Node typeAttribute = typeAttributes.getNamedItem("type");
288 String documentType = null;
289
290 NodeList childNodes = children.item(c).getChildNodes();
291 for (int n = 0; n < childNodes.getLength(); n ++)
292 { if (childNodes.item(n).getNodeType() == org.w3c.dom.Node.TEXT_NODE)
293 { String label = childNodes.item(n).getNodeValue();
294 label.trim();
295 if (label.length() > 0) {
296 documentType = label;
297 System.out.println("Document type " + documentType);
298 }
299 }
300 }
301 }
302 else if (name.equals(GSXML.SEARCH_ELEM)) {
303 // pick up attributes from the <search> tag now...
304 NamedNodeMap searchAttributes = children.item(c).getAttributes();
305 Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT);
306 String searchType = null;
307 if (searchAttribute != null) {
308 searchType = searchAttribute.getNodeValue();
309 } else {
310 System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg");
311 searchType = MGIndexer.MG_INDEX_TYPE;
312 }
313
314 searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT);
315 String searchName = null;
316 if (searchAttribute != null) {
317 searchName = searchAttribute.getNodeValue();
318 }
319 if (searchName == null) {
320 searchName = "idx"; // need to modify this if we have two search elements with no names
321 }
322 // create the pertinent indexer...
323 IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName);
324
325 if (indexer == null) {
326 continue;
327 }
328
329 // configure the indexer
330 indexer.configure(children.item(c));
331
332 // install it into the build manager
333 this.buildManager.addIndexer(indexer);
334 }
335 else if (name.equals(GSXML.BROWSE_ELEM))
336 { this.configureBrowsers(children.item(c), collectionConfig);
337 }
338 // TODO: other elements - make a factory-method approach here...
339 else
340 {
341 }
342 }
343 }
344 catch (FactoryConfigurationError e) {
345 System.out.println(e);
346 }
347 catch (ParserConfigurationException ex) {
348 System.out.println(ex);
349 }
350 catch (SAXException ex) {
351 System.out.println(ex);
352 }
353 catch (IOException ex)
354 {
355 System.out.println(ex);
356 }
357
358 System.out.println("<<<Obtaining database>>>>");
359 }
360
361 public String getEtcDirectory()
362
363 { return GSFile.collectionEtcDir(this.collectionHome);
364 }
365
366 public String getImportDirectory()
367 { return GSFile.collectionImportDir(this.collectionHome);
368 }
369
370 public String getBuildDirectory()
371 { return GSFile.collectionBuildDir(this.collectionHome);
372 }
373
374 public String getArchiveDirectory()
375 { return GSFile.collectionArchiveDir(this.collectionHome);
376 }
377
378 public GS3SQLConnection getDatabase()
379 {
380 return this.database;
381 }
382
383 public Date getBuildDate()
384 { return this.lastBuildDate.getTime();
385 }
386
387 public void startBuild()
388 { GregorianCalendar today = new GregorianCalendar();
389
390 if (this.lastBuildDate != null)
391 { // if the build date is different to the last build date, then reset the build
392 // document number
393 if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) ||
394 today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) ||
395 today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH))
396 { this.buildDocNo = 1;
397 }
398 }
399 this.lastBuildDate = today;
400 }
401
402 public void endBuild()
403 {
404 // here we write out the build config file
405 // create the xml for the buildConfig
406 XMLConverter converter = new XMLConverter();
407 Document doc = converter.newDOM();
408 Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM);
409 Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
410 build_config.appendChild(meta_list);
411 Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER);
412 build_config.appendChild(service_list);
413
414 // the document structure and metadata retrieval will use GS3REtrieve service, so add it in here
415 Element base_retrieve_service = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
416 base_retrieve_service.setAttribute(GSXML.NAME_ATT, "GS3Retrieve");
417 service_list.appendChild(base_retrieve_service);
418 // ask the indexers to add stuff into the service rack list
419 this.buildManager.getIndexerManager().addServiceDescriptions(service_list);
420 this.buildManager.getClassifierManager().addServiceDescriptions(service_list);
421 // get the String
422 String build_config_string = converter.getString(build_config);
423 // write it to the file
424 try {
425 File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome));
426 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8"));
427 writer.write(build_config_string, 0, build_config_string.length());
428 writer.close();
429 } catch (Exception e) {
430 System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file.");
431 System.err.println(e.getMessage());
432 }
433 Date startDate = this.lastBuildDate.getTime();
434 Date date = new Date();
435
436 long startTime = startDate.getTime();
437 long endTime = date.getTime();
438
439 long difference = ((endTime - startTime) + 500) / 1000;
440
441 System.out.println("Build completed");
442 System.out.println("---------------");
443 System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount"));
444 System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs.");
445 }
446
447 public String getNextDocumentID()
448 { StringBuffer ID = new StringBuffer();
449
450 int value;
451 ID.append(lastBuildDate.get(Calendar.YEAR));
452
453 // the use of month is a little odd, hence the following
454 // code. Calendar.MONTH yields 0 = January, 1 = February,
455 // etc. hence there is a '+1' added to the month to make
456 // it into January = 1, etc., and the padding is altered
457 // correspondingly.
458 value = lastBuildDate.get(Calendar.MONTH);
459 if (value < 9)
460 { ID.append("0");
461 }
462 ID.append(value + 1);
463 value = lastBuildDate.get(Calendar.DAY_OF_MONTH);
464 if (value < 10)
465 ID.append("0");
466 ID.append(value);
467
468
469 value = this.buildDocNo;
470 this.buildDocNo ++;
471
472 ID.append(":");
473 ID.append(Integer.toString(value));
474 return ID.toString();
475 }
476
477 public int getDocumentNumber()
478 { this.buildDocNo ++;
479 return this.buildDocNo - 1;
480 }
481
482 /**
483 * Get the collection metadata item in the given namespace
484 *
485 * @param <code>String</code> the namespace
486 * @param <code>String</code> the label of the metadata
487 */
488 public String getCollectionMetadata(String namespace, String label)
489 { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString();
490 }
491
492 /**
493 * Set the collection metadata item in the given namespace
494 *
495 * @param <code>String</code> the namespace
496 * @param <code>String</code> the label
497 * @param <code>String</code> the value
498 */
499 public void setCollectionMetadata(String namespace, String label, String value)
500 { this.metadata.setCollectionMetadata(namespace, label, value);
501 }
502}
503
Note: See TracBrowser for help on using the repository browser.