source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 8869

Last change on this file since 8869 was 8869, checked in by schweer, 19 years ago

notifications will only be sent if the collectionConfig.xml of the collection has an entry <nofiy host=hostId/>, with hostId being the name and port of the host the notifications should be sent to (for most cases, this will be localhost:8080). note that the alerting service (/research/schweer/gsdl3/packages/gsdl-as) has to be deployed at /alerting for this to work, and soap for localsite has to be enabled.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.notifier.NotifierManager;
6import org.greenstone.gsdl3.gs3build.extractor.*;
7import org.greenstone.gsdl3.gs3build.classifier.*;
8import org.greenstone.gsdl3.gs3build.collection.*;
9
10import java.io.File;
11import java.util.List;
12
13public class BuildManager
14{
15 CollectionManager collectionManager;
16 RecogniserManager recogniserManager;
17 IndexerManager indexerManager;
18 ClassifierManager classifierManager;
19 ExtractorManager extractorManager;
20 NotifierManager notifierManager;
21 DocumentList docList;
22 List inputRoots;
23 String outputDir;
24 String archiveDir;
25
26 public BuildManager(List inputRoots, String site, String collection, String outputDir)
27 {
28 // set up the collection manager
29 this.collectionManager = new CollectionManager(site, collection);
30 this.collectionManager.setBuildManager(this);
31
32 // set up the list of input directories
33 if (inputRoots.size() == 0) {
34 inputRoots.add(this.collectionManager.getImportDirectory());
35 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
36 }
37 this.inputRoots = inputRoots;
38
39 // set up the output directories (building and archive)
40 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
41 File outputDirFile = new File(this.outputDir);
42 if (!outputDirFile.isDirectory()) {
43 System.err.println("Unable to open directory " + this.outputDir + " for writing");
44 System.exit(1);
45 }
46 System.out.println("Output directory set to " + this.outputDir);
47
48 // set up the archive directory - this will definitely exist
49 this.archiveDir = this.collectionManager.getArchiveDirectory();
50
51 // the list that documents get added to when they are recognised
52 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
53 this.collectionManager.getDatabase());
54
55 // set up the various managers
56 this.recogniserManager = new RecogniserManager(this.docList);
57 this.extractorManager = new ExtractorManager(this.docList);
58 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
59 this.indexerManager = new IndexerManager(this.docList);
60 this.notifierManager = new NotifierManager();
61
62 // configure the collection - this will add classifiers, indexers, recognisers to the various managers.
63 this.collectionManager.configureCollection();
64
65 // add in expanders by hand cos they are not in the config file yet
66 // TODO
67 // add in extractors by hand cos they are not in the config file yet
68 this.extractorManager.addExtractor(new IndexExtractor());
69 this.extractorManager.addExtractor(new MetaXMLExtractor());
70
71 // add in enrichers by hand cos they are not in the config file yet
72 // TODO
73
74 // prepare a file crawler on the etc directory, and a crawl observer to respond to
75 // the appropriate files.
76 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
77 new CollectionCrawlObserver(this));
78
79 fileCrawler.crawl();
80 }
81
82 public String getLastBuildSequence()
83 {
84 return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
85 }
86
87 public ClassifierManager getClassifierManager()
88 {
89 return this.classifierManager;
90 }
91
92 public IndexerManager getIndexerManager()
93 {
94 return this.indexerManager;
95 }
96
97 // TODO: validation phase
98 public RecogniserManager getRecogniserManager()
99 {
100 return this.recogniserManager;
101 }
102
103 public void addIndexer(IndexerInterface iface)
104 {
105 iface.configure(IndexerManager.outputDir, this.outputDir);
106 this.indexerManager.addIndexer(iface);
107 }
108
109 public void run()
110 {
111 // Initialise collection manager - note start of build
112 this.collectionManager.startBuild();
113
114 // TODO: add expansion (e.g. Zip files)
115
116 // Crawl the file tree - will recognise documents
117 for (int i = 0; i < this.inputRoots.size(); i ++) {
118 FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
119
120 fileCrawler.crawl();
121 }
122
123 // Extract phase, etc.
124 this.extractorManager.extractDocuments();
125 this.classifierManager.classifyDocuments();
126 this.indexerManager.indexDocuments();
127
128 // Timestamp management - update all timestamps on modified dates...
129 //
130 // This should only occur at the end of building in case the build is cancelled...
131 docList.updateTimestamps(this.collectionManager.getBuildTimestamp());
132
133 // TODO: validation phase
134 this.notifierManager.detectEvents(this.collectionManager);
135
136 if (this.archiveDir != null) {
137 this.docList.writeDocuments(new File(this.archiveDir));
138 //this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
139 }
140
141 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
142
143 // TODO: write out collection configuration file (service lists, etc.)
144
145 this.collectionManager.endBuild();
146 }
147
148 public void addInputDirectory(String inputDir)
149 {
150 this.inputRoots.add(inputDir);
151 }
152}
153
154
Note: See TracBrowser for help on using the repository browser.