1 | package org.greenstone.gsdl3.gs3build;
|
---|
2 |
|
---|
3 | import org.greenstone.gsdl3.gs3build.doctypes.*;
|
---|
4 | import org.greenstone.gsdl3.gs3build.indexers.*;
|
---|
5 | import org.greenstone.gsdl3.gs3build.extractor.*;
|
---|
6 |
|
---|
7 | import java.io.File;
|
---|
8 | import java.util.List;
|
---|
9 |
|
---|
10 | public class BuildManager
|
---|
11 | {
|
---|
12 | CollectionManager collectionManager;
|
---|
13 | RecogniserManager recogniserManager;
|
---|
14 | IndexerManager indexerManager;
|
---|
15 | ExtractorManager extractorManager;
|
---|
16 | DocumentList docList;
|
---|
17 | List inputRoots;
|
---|
18 | String outputDir;
|
---|
19 |
|
---|
20 | public BuildManager(List inputRoots, String collection, String outputDir)
|
---|
21 | {
|
---|
22 | // get the collection configuration information
|
---|
23 | this.collectionManager = new CollectionManager(collection);
|
---|
24 |
|
---|
25 | // set up the list of input directories
|
---|
26 | if (inputRoots.size() == 0) {
|
---|
27 | inputRoots.add(this.collectionManager.getImportDirectory());
|
---|
28 | System.out.println("Adding collection import directory to input list");
|
---|
29 | }
|
---|
30 | this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager));
|
---|
31 | this.inputRoots = inputRoots;
|
---|
32 |
|
---|
33 | // set up the output directory
|
---|
34 | this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
|
---|
35 | File outputDirFile = new File(this.outputDir);
|
---|
36 | if (!outputDirFile.exists() || !outputDirFile.isDirectory())
|
---|
37 | { System.err.println("Unable to open directory " + this.outputDir + " for writing");
|
---|
38 | System.exit(1);
|
---|
39 | }
|
---|
40 | System.out.println("Output directory set to " + this.outputDir);
|
---|
41 |
|
---|
42 | // initialise any recognisers
|
---|
43 | this.recogniserManager = new RecogniserManager();
|
---|
44 | RecogniserInterface recogniser = new HTMLRecogniser(docList);
|
---|
45 | recogniserManager.addRecogniser(recogniser);
|
---|
46 | recogniser = new TextRecogniser(docList);
|
---|
47 | recogniserManager.addRecogniser(recogniser);
|
---|
48 |
|
---|
49 | // set up the extractors
|
---|
50 | this.extractorManager = new ExtractorManager(this.docList);
|
---|
51 |
|
---|
52 | // set up the enrichers
|
---|
53 |
|
---|
54 | // set up the classifiers
|
---|
55 |
|
---|
56 | // set up the indexers
|
---|
57 | this.indexerManager = new IndexerManager(this.docList);
|
---|
58 | IndexerInterface iface = new MGIndexer();
|
---|
59 | iface.configure(this.outputDir);
|
---|
60 | this.indexerManager.addIndexer(iface);
|
---|
61 | }
|
---|
62 |
|
---|
63 | public String getLastBuildSequence()
|
---|
64 | { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
|
---|
65 | }
|
---|
66 |
|
---|
67 | public void run()
|
---|
68 | {
|
---|
69 | this.collectionManager.startBuild();
|
---|
70 |
|
---|
71 | for (int i = 0; i < this.inputRoots.size(); i ++)
|
---|
72 | { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
|
---|
73 |
|
---|
74 | fileCrawler.crawl();
|
---|
75 | }
|
---|
76 | this.indexerManager.indexDocuments();
|
---|
77 |
|
---|
78 | this.docList.writeDocuments(new File(outputDir));
|
---|
79 | this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
|
---|
80 |
|
---|
81 | this.collectionManager.endBuild();
|
---|
82 | }
|
---|
83 |
|
---|
84 | public void addInputDirectory(String inputDir)
|
---|
85 | { this.inputRoots.add(inputDir);
|
---|
86 | }
|
---|
87 | }
|
---|