1 | package org.greenstone.gsdl3.gs3build;
|
---|
2 |
|
---|
3 | import org.greenstone.gsdl3.gs3build.doctypes.*;
|
---|
4 | import org.greenstone.gsdl3.gs3build.indexers.*;
|
---|
5 |
|
---|
6 | import java.io.File;
|
---|
7 | import java.util.List;
|
---|
8 |
|
---|
9 | public class BuildManager
|
---|
10 | {
|
---|
11 | CollectionManager collectionManager;
|
---|
12 | RecogniserManager recogniserManager;
|
---|
13 | IndexerManager indexerManager;
|
---|
14 | ExtractorManager extractorManager;
|
---|
15 | DocumentList docList;
|
---|
16 | List inputRoots;
|
---|
17 | String outputDir;
|
---|
18 |
|
---|
19 | public BuildManager(List inputRoots, String collection, String outputDir)
|
---|
20 | {
|
---|
21 | // get the collection configuration information
|
---|
22 | this.collectionManager = new CollectionManager(collection);
|
---|
23 |
|
---|
24 | // set up the list of input directories
|
---|
25 | if (inputRoots.size() == 0) {
|
---|
26 | inputRoots.add(this.collectionManager.getImportDirectory());
|
---|
27 | System.out.println("Adding collection import directory to input list");
|
---|
28 | }
|
---|
29 | this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager));
|
---|
30 | this.inputRoots = inputRoots;
|
---|
31 |
|
---|
32 | // set up the output directory
|
---|
33 | this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
|
---|
34 | File outputDirFile = new File(this.outputDir);
|
---|
35 | if (!outputDirFile.exists() || !outputDirFile.isDirectory())
|
---|
36 | { System.err.println("Unable to open directory " + this.outputDir + " for writing");
|
---|
37 | System.exit(1);
|
---|
38 | }
|
---|
39 | System.out.println("Output directory set to " + this.outputDir);
|
---|
40 |
|
---|
41 | // initialise any recognisers
|
---|
42 | this.recogniserManager = new RecogniserManager();
|
---|
43 | RecogniserInterface recogniser = new HTMLRecogniser(docList);
|
---|
44 | recogniserManager.addRecogniser(recogniser);
|
---|
45 | recogniser = new TextRecogniser(docList);
|
---|
46 | recogniserManager.addRecogniser(recogniser);
|
---|
47 |
|
---|
48 | // set up the extractors
|
---|
49 | this.extractorManager = new ExtractorManager(this.docList);
|
---|
50 |
|
---|
51 | // set up the enrichers
|
---|
52 |
|
---|
53 | // set up the classifiers
|
---|
54 |
|
---|
55 | // set up the indexers
|
---|
56 | this.indexerManager = new IndexerManager(this.docList);
|
---|
57 | IndexerInterface iface = new MGIndexer();
|
---|
58 | iface.configure(this.outputDir);
|
---|
59 | this.indexerManager.addIndexer(iface);
|
---|
60 | }
|
---|
61 |
|
---|
62 | public String getLastBuildSequence()
|
---|
63 | { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
|
---|
64 | }
|
---|
65 |
|
---|
66 | public void run()
|
---|
67 | {
|
---|
68 | this.collectionManager.startBuild();
|
---|
69 |
|
---|
70 | for (int i = 0; i < this.inputRoots.size(); i ++)
|
---|
71 | { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
|
---|
72 |
|
---|
73 | fileCrawler.crawl();
|
---|
74 | }
|
---|
75 | this.indexerManager.indexDocuments();
|
---|
76 |
|
---|
77 | this.docList.writeDocuments(new File(outputDir));
|
---|
78 | this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
|
---|
79 |
|
---|
80 | this.collectionManager.endBuild();
|
---|
81 | }
|
---|
82 |
|
---|
83 | public void addInputDirectory(String inputDir)
|
---|
84 | { this.inputRoots.add(inputDir);
|
---|
85 | }
|
---|
86 | }
|
---|