source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 6503

Last change on this file since 6503 was 6454, checked in by cs025, 20 years ago

Added MetaXMLExtractor extractor, etc.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6import org.greenstone.gsdl3.gs3build.classifier.*;
7import org.greenstone.gsdl3.gs3build.collection.*;
8
9import java.io.File;
10import java.util.List;
11
12public class BuildManager
13{
14 CollectionManager collectionManager;
15 RecogniserManager recogniserManager;
16 IndexerManager indexerManager;
17 ClassifierManager classifierManager;
18 ExtractorManager extractorManager;
19 DocumentList docList;
20 List inputRoots;
21 String outputDir;
22 String archiveDir;
23
24 public BuildManager(List inputRoots, String collection, String outputDir)
25 {
26 // get the collection configuration information
27 this.collectionManager = new CollectionManager(collection);
28
29 // set up the list of input directories
30 if (inputRoots.size() == 0) {
31 inputRoots.add(this.collectionManager.getImportDirectory());
32 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
33 }
34 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
35 this.collectionManager.getDatabase());
36 this.inputRoots = inputRoots;
37
38 // set up the output directory
39 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
40 File outputDirFile = new File(this.outputDir);
41 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
42 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
43 System.exit(1);
44 }
45 System.out.println("Output directory set to " + this.outputDir);
46
47 this.archiveDir = this.collectionManager.getArchiveDirectory();
48
49 // TODO: initialise the expanders here...
50
51 // initialise any recognisers
52 this.recogniserManager = new RecogniserManager();
53 RecogniserInterface recogniser = new HTMLRecogniser(docList);
54 recogniserManager.addRecogniser(recogniser);
55 recogniserManager.addRecogniser(new IndexRecogniser(docList));
56 recogniser = new TextRecogniser(docList);
57 recogniserManager.addRecogniser(recogniser);
58 recogniser = new MetadataRecogniser(docList);
59 recogniserManager.addRecogniser(recogniser);
60
61 // set up the extractors
62 this.extractorManager = new ExtractorManager(this.docList);
63 this.extractorManager.addExtractor(new IndexExtractor());
64 this.extractorManager.addExtractor(new MetaXMLExtractor());
65
66 // TODO: set up the enrichers...
67
68 // set up the classifiers
69 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
70
71 // set up the indexers
72 this.indexerManager = new IndexerManager(this.docList);
73
74 // configure the collection
75 this.collectionManager.configureCollection(this);
76
77 // prepare a file crawler on the etc directory, and a crawl observer to respond to
78 // the appropriate files.
79 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
80 new CollectionCrawlObserver(this));
81
82 fileCrawler.crawl();
83 }
84
85 public String getLastBuildSequence()
86 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
87 }
88
89 public ClassifierManager getClassifierManager()
90 { return this.classifierManager;
91 }
92
93 public IndexerManager getIndexerManager()
94 { return this.indexerManager;
95 }
96
97 public void addIndexer(IndexerInterface iface)
98 { iface.configure(IndexerManager.outputDir, this.outputDir);
99 this.indexerManager.addIndexer(iface);
100 }
101
102 public void run()
103 {
104 this.collectionManager.startBuild();
105
106 for (int i = 0; i < this.inputRoots.size(); i ++)
107 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
108
109 fileCrawler.crawl();
110 }
111 this.indexerManager.indexDocuments();
112 this.extractorManager.extractDocuments();
113 this.classifierManager.classifyDocuments();
114
115 if (this.archiveDir != null) {
116 this.docList.writeDocuments(new File(this.archiveDir));
117 // this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
118 }
119
120 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
121
122 this.collectionManager.endBuild();
123 }
124
125 public void addInputDirectory(String inputDir)
126 { this.inputRoots.add(inputDir);
127 }
128}
129
130
Note: See TracBrowser for help on using the repository browser.