source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 5823

Last change on this file since 5823 was 5823, checked in by cs025, 20 years ago

Fixed missing import of extractors

  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6
7import java.io.File;
8import java.util.List;
9
10public class BuildManager
11{
12 CollectionManager collectionManager;
13 RecogniserManager recogniserManager;
14 IndexerManager indexerManager;
15 ExtractorManager extractorManager;
16 DocumentList docList;
17 List inputRoots;
18 String outputDir;
19
20 public BuildManager(List inputRoots, String collection, String outputDir)
21 {
22 // get the collection configuration information
23 this.collectionManager = new CollectionManager(collection);
24
25 // set up the list of input directories
26 if (inputRoots.size() == 0) {
27 inputRoots.add(this.collectionManager.getImportDirectory());
28 System.out.println("Adding collection import directory to input list");
29 }
30 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager));
31 this.inputRoots = inputRoots;
32
33 // set up the output directory
34 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
35 File outputDirFile = new File(this.outputDir);
36 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
37 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
38 System.exit(1);
39 }
40 System.out.println("Output directory set to " + this.outputDir);
41
42 // initialise any recognisers
43 this.recogniserManager = new RecogniserManager();
44 RecogniserInterface recogniser = new HTMLRecogniser(docList);
45 recogniserManager.addRecogniser(recogniser);
46 recogniser = new TextRecogniser(docList);
47 recogniserManager.addRecogniser(recogniser);
48
49 // set up the extractors
50 this.extractorManager = new ExtractorManager(this.docList);
51
52 // set up the enrichers
53
54 // set up the classifiers
55
56 // set up the indexers
57 this.indexerManager = new IndexerManager(this.docList);
58 IndexerInterface iface = new MGIndexer();
59 iface.configure(this.outputDir);
60 this.indexerManager.addIndexer(iface);
61 }
62
63 public String getLastBuildSequence()
64 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
65 }
66
67 public void run()
68 {
69 this.collectionManager.startBuild();
70
71 for (int i = 0; i < this.inputRoots.size(); i ++)
72 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
73
74 fileCrawler.crawl();
75 }
76 this.indexerManager.indexDocuments();
77
78 this.docList.writeDocuments(new File(outputDir));
79 this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
80
81 this.collectionManager.endBuild();
82 }
83
84 public void addInputDirectory(String inputDir)
85 { this.inputRoots.add(inputDir);
86 }
87}
Note: See TracBrowser for help on using the repository browser.