1 | package org.greenstone.gsdl3.gs3build;
|
---|
2 |
|
---|
3 | import org.greenstone.gsdl3.gs3build.doctypes.*;
|
---|
4 | import org.greenstone.gsdl3.gs3build.indexers.*;
|
---|
5 | import org.greenstone.gsdl3.gs3build.notifier.NotifierManager;
|
---|
6 | import org.greenstone.gsdl3.gs3build.extractor.*;
|
---|
7 | import org.greenstone.gsdl3.gs3build.classifier.*;
|
---|
8 | import org.greenstone.gsdl3.gs3build.collection.*;
|
---|
9 |
|
---|
10 | import java.io.File;
|
---|
11 | import java.util.List;
|
---|
12 |
|
---|
13 | public class BuildManager
|
---|
14 | {
|
---|
15 | CollectionManager collectionManager;
|
---|
16 | RecogniserManager recogniserManager;
|
---|
17 | IndexerManager indexerManager;
|
---|
18 | ClassifierManager classifierManager;
|
---|
19 | ExtractorManager extractorManager;
|
---|
20 | NotifierManager notifierManager;
|
---|
21 | DocumentList docList;
|
---|
22 | List inputRoots;
|
---|
23 | String outputDir;
|
---|
24 | String archiveDir;
|
---|
25 |
|
---|
26 | public BuildManager(List inputRoots, String site, String collection, String outputDir)
|
---|
27 | {
|
---|
28 | // set up the collection manager
|
---|
29 | this.collectionManager = new CollectionManager(site, collection);
|
---|
30 | this.collectionManager.setBuildManager(this);
|
---|
31 |
|
---|
32 | // set up the list of input directories
|
---|
33 | if (inputRoots.size() == 0) {
|
---|
34 | inputRoots.add(this.collectionManager.getImportDirectory());
|
---|
35 | System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
|
---|
36 | }
|
---|
37 | this.inputRoots = inputRoots;
|
---|
38 |
|
---|
39 | // set up the output directories (building and archive)
|
---|
40 | this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
|
---|
41 | File outputDirFile = new File(this.outputDir);
|
---|
42 | if (!outputDirFile.isDirectory()) {
|
---|
43 | System.err.println("Unable to open directory " + this.outputDir + " for writing");
|
---|
44 | System.exit(1);
|
---|
45 | }
|
---|
46 | System.out.println("Output directory set to " + this.outputDir);
|
---|
47 |
|
---|
48 | // set up the archive directory - this will definitely exist
|
---|
49 | this.archiveDir = this.collectionManager.getArchiveDirectory();
|
---|
50 |
|
---|
51 | // the list that documents get added to when they are recognised
|
---|
52 | this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
|
---|
53 | this.collectionManager.getDatabase());
|
---|
54 |
|
---|
55 | // set up the various managers
|
---|
56 | this.recogniserManager = new RecogniserManager(this.docList);
|
---|
57 | this.extractorManager = new ExtractorManager(this.docList);
|
---|
58 | this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
|
---|
59 | this.indexerManager = new IndexerManager(this.docList);
|
---|
60 | this.notifierManager = new NotifierManager();
|
---|
61 |
|
---|
62 | // configure the collection - this will add classifiers, indexers, recognisers to the various managers.
|
---|
63 | this.collectionManager.configureCollection();
|
---|
64 |
|
---|
65 | // add in expanders by hand cos they are not in the config file yet
|
---|
66 | // TODO
|
---|
67 | // add in extractors by hand cos they are not in the config file yet
|
---|
68 | this.extractorManager.addExtractor(new IndexExtractor());
|
---|
69 | this.extractorManager.addExtractor(new MetaXMLExtractor());
|
---|
70 |
|
---|
71 | // add in enrichers by hand cos they are not in the config file yet
|
---|
72 | // TODO
|
---|
73 |
|
---|
74 | // prepare a file crawler on the etc directory, and a crawl observer to respond to
|
---|
75 | // the appropriate files.
|
---|
76 | FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
|
---|
77 | new CollectionCrawlObserver(this));
|
---|
78 |
|
---|
79 | fileCrawler.crawl();
|
---|
80 | }
|
---|
81 |
|
---|
82 | public String getLastBuildSequence()
|
---|
83 | {
|
---|
84 | return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
|
---|
85 | }
|
---|
86 |
|
---|
87 | public ClassifierManager getClassifierManager()
|
---|
88 | {
|
---|
89 | return this.classifierManager;
|
---|
90 | }
|
---|
91 |
|
---|
92 | public IndexerManager getIndexerManager()
|
---|
93 | {
|
---|
94 | return this.indexerManager;
|
---|
95 | }
|
---|
96 |
|
---|
97 | // TODO: validation phase
|
---|
98 | public RecogniserManager getRecogniserManager()
|
---|
99 | {
|
---|
100 | return this.recogniserManager;
|
---|
101 | }
|
---|
102 |
|
---|
103 | public void addIndexer(IndexerInterface iface)
|
---|
104 | {
|
---|
105 | iface.configure(IndexerManager.outputDir, this.outputDir);
|
---|
106 | this.indexerManager.addIndexer(iface);
|
---|
107 | }
|
---|
108 |
|
---|
109 | public void run()
|
---|
110 | {
|
---|
111 | // Initialise collection manager - note start of build
|
---|
112 | this.collectionManager.startBuild();
|
---|
113 |
|
---|
114 | // TODO: add expansion (e.g. Zip files)
|
---|
115 |
|
---|
116 | // Crawl the file tree - will recognise documents
|
---|
117 | for (int i = 0; i < this.inputRoots.size(); i ++) {
|
---|
118 | FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
|
---|
119 |
|
---|
120 | fileCrawler.crawl();
|
---|
121 | }
|
---|
122 |
|
---|
123 | // Extract phase, etc.
|
---|
124 | this.extractorManager.extractDocuments();
|
---|
125 | this.classifierManager.classifyDocuments();
|
---|
126 | this.indexerManager.indexDocuments();
|
---|
127 |
|
---|
128 | // Timestamp management - update all timestamps on modified dates...
|
---|
129 | //
|
---|
130 | // This should only occur at the end of building in case the build is cancelled...
|
---|
131 | docList.updateTimestamps(this.collectionManager.getBuildTimestamp());
|
---|
132 |
|
---|
133 | // TODO: validation phase
|
---|
134 | this.notifierManager.detectEvents(this.collectionManager);
|
---|
135 |
|
---|
136 | if (this.archiveDir != null) {
|
---|
137 | this.docList.writeDocuments(new File(this.archiveDir));
|
---|
138 | //this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
|
---|
139 | }
|
---|
140 |
|
---|
141 | this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
|
---|
142 |
|
---|
143 | // TODO: write out collection configuration file (service lists, etc.)
|
---|
144 |
|
---|
145 | this.collectionManager.endBuild();
|
---|
146 | }
|
---|
147 |
|
---|
148 | public void addInputDirectory(String inputDir)
|
---|
149 | {
|
---|
150 | this.inputRoots.add(inputDir);
|
---|
151 | }
|
---|
152 | }
|
---|
153 |
|
---|
154 |
|
---|