1 | package org.greenstone.gsdl3.gs3build;
|
---|
2 |
|
---|
3 | import org.greenstone.gsdl3.gs3build.doctypes.*;
|
---|
4 | import org.greenstone.gsdl3.gs3build.indexers.*;
|
---|
5 | import org.greenstone.gsdl3.gs3build.extractor.*;
|
---|
6 | import org.greenstone.gsdl3.gs3build.classifier.*;
|
---|
7 | import org.greenstone.gsdl3.gs3build.collection.*;
|
---|
8 |
|
---|
9 | import java.io.File;
|
---|
10 | import java.util.List;
|
---|
11 |
|
---|
12 | public class BuildManager
|
---|
13 | {
|
---|
14 | CollectionManager collectionManager;
|
---|
15 | RecogniserManager recogniserManager;
|
---|
16 | IndexerManager indexerManager;
|
---|
17 | ClassifierManager classifierManager;
|
---|
18 | ExtractorManager extractorManager;
|
---|
19 | DocumentList docList;
|
---|
20 | List inputRoots;
|
---|
21 | String outputDir;
|
---|
22 | String archiveDir;
|
---|
23 |
|
---|
24 | public BuildManager(List inputRoots, String site, String collection, String outputDir)
|
---|
25 | {
|
---|
26 | // get the collection configuration information
|
---|
27 | this.collectionManager = new CollectionManager(site, collection);
|
---|
28 | this.collectionManager.setBuildManager(this);
|
---|
29 | // set up the list of input directories
|
---|
30 | if (inputRoots.size() == 0) {
|
---|
31 | inputRoots.add(this.collectionManager.getImportDirectory());
|
---|
32 | System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
|
---|
33 | }
|
---|
34 | this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
|
---|
35 | this.collectionManager.getDatabase());
|
---|
36 | this.inputRoots = inputRoots;
|
---|
37 |
|
---|
38 | // set up the output directory
|
---|
39 | this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
|
---|
40 | File outputDirFile = new File(this.outputDir);
|
---|
41 | if (!outputDirFile.exists() || !outputDirFile.isDirectory())
|
---|
42 | { System.err.println("Unable to open directory " + this.outputDir + " for writing");
|
---|
43 | System.exit(1);
|
---|
44 | }
|
---|
45 | System.out.println("Output directory set to " + this.outputDir);
|
---|
46 |
|
---|
47 | this.archiveDir = this.collectionManager.getArchiveDirectory();
|
---|
48 |
|
---|
49 | // TODO: initialise the expanders here...
|
---|
50 |
|
---|
51 | // initialise any recognisers
|
---|
52 | this.recogniserManager = new RecogniserManager();
|
---|
53 | RecogniserInterface recogniser = new HTMLRecogniser(docList);
|
---|
54 | recogniserManager.addRecogniser(recogniser);
|
---|
55 | recogniserManager.addRecogniser(new IndexRecogniser(docList));
|
---|
56 | recogniser = new TextRecogniser(docList);
|
---|
57 | recogniserManager.addRecogniser(recogniser);
|
---|
58 | recogniser = new MetadataRecogniser(docList);
|
---|
59 | recogniserManager.addRecogniser(recogniser);
|
---|
60 |
|
---|
61 | // set up the extractors
|
---|
62 | this.extractorManager = new ExtractorManager(this.docList);
|
---|
63 | this.extractorManager.addExtractor(new IndexExtractor());
|
---|
64 | this.extractorManager.addExtractor(new MetaXMLExtractor());
|
---|
65 |
|
---|
66 | // TODO: set up the enrichers...
|
---|
67 |
|
---|
68 | // set up the classifiers
|
---|
69 | this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
|
---|
70 |
|
---|
71 | // set up the indexers
|
---|
72 | this.indexerManager = new IndexerManager(this.docList);
|
---|
73 |
|
---|
74 | // configure the collection
|
---|
75 | this.collectionManager.configureCollection();
|
---|
76 |
|
---|
77 | // prepare a file crawler on the etc directory, and a crawl observer to respond to
|
---|
78 | // the appropriate files.
|
---|
79 | FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
|
---|
80 | new CollectionCrawlObserver(this));
|
---|
81 |
|
---|
82 | fileCrawler.crawl();
|
---|
83 | }
|
---|
84 |
|
---|
85 | public String getLastBuildSequence()
|
---|
86 | { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
|
---|
87 | }
|
---|
88 |
|
---|
89 | public ClassifierManager getClassifierManager()
|
---|
90 | { return this.classifierManager;
|
---|
91 | }
|
---|
92 |
|
---|
93 | public IndexerManager getIndexerManager()
|
---|
94 | { return this.indexerManager;
|
---|
95 | }
|
---|
96 |
|
---|
97 | public void addIndexer(IndexerInterface iface)
|
---|
98 | { iface.configure(IndexerManager.outputDir, this.outputDir);
|
---|
99 | this.indexerManager.addIndexer(iface);
|
---|
100 | }
|
---|
101 |
|
---|
102 | public void run()
|
---|
103 | { // Initialise collection manager - note start of build
|
---|
104 | this.collectionManager.startBuild();
|
---|
105 |
|
---|
106 | // TODO: add expansion (e.g. Zip files)
|
---|
107 |
|
---|
108 | for (int i = 0; i < this.inputRoots.size(); i ++)
|
---|
109 | { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
|
---|
110 |
|
---|
111 | fileCrawler.crawl();
|
---|
112 | }
|
---|
113 | this.extractorManager.extractDocuments();
|
---|
114 | this.classifierManager.classifyDocuments();
|
---|
115 | this.indexerManager.indexDocuments();
|
---|
116 |
|
---|
117 | // TODO: validation phase
|
---|
118 |
|
---|
119 | if (this.archiveDir != null) {
|
---|
120 | this.docList.writeDocuments(new File(this.archiveDir));
|
---|
121 | // this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
|
---|
122 | }
|
---|
123 |
|
---|
124 | this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
|
---|
125 |
|
---|
126 | // TODO: write out collection configuration file (service lists, etc.)
|
---|
127 |
|
---|
128 | this.collectionManager.endBuild();
|
---|
129 | }
|
---|
130 |
|
---|
131 | public void addInputDirectory(String inputDir)
|
---|
132 | { this.inputRoots.add(inputDir);
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|
136 |
|
---|