source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 6868

Last change on this file since 6868 was 6868, checked in by kjdon, 20 years ago

now uses site name as well as coll name

  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6import org.greenstone.gsdl3.gs3build.classifier.*;
7import org.greenstone.gsdl3.gs3build.collection.*;
8
9import java.io.File;
10import java.util.List;
11
12public class BuildManager
13{
14 CollectionManager collectionManager;
15 RecogniserManager recogniserManager;
16 IndexerManager indexerManager;
17 ClassifierManager classifierManager;
18 ExtractorManager extractorManager;
19 DocumentList docList;
20 List inputRoots;
21 String outputDir;
22 String archiveDir;
23
24 public BuildManager(List inputRoots, String site, String collection, String outputDir)
25 {
26 // get the collection configuration information
27 this.collectionManager = new CollectionManager(site, collection);
28
29 // set up the list of input directories
30 if (inputRoots.size() == 0) {
31 inputRoots.add(this.collectionManager.getImportDirectory());
32 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
33 }
34 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
35 this.collectionManager.getDatabase());
36 this.inputRoots = inputRoots;
37
38 // set up the output directory
39 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
40 File outputDirFile = new File(this.outputDir);
41 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
42 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
43 System.exit(1);
44 }
45 System.out.println("Output directory set to " + this.outputDir);
46
47 this.archiveDir = this.collectionManager.getArchiveDirectory();
48
49 // TODO: initialise the expanders here...
50
51 // initialise any recognisers
52 this.recogniserManager = new RecogniserManager();
53 RecogniserInterface recogniser = new HTMLRecogniser(docList);
54 recogniserManager.addRecogniser(recogniser);
55 recogniserManager.addRecogniser(new IndexRecogniser(docList));
56 recogniser = new TextRecogniser(docList);
57 recogniserManager.addRecogniser(recogniser);
58 recogniser = new MetadataRecogniser(docList);
59 recogniserManager.addRecogniser(recogniser);
60
61 // set up the extractors
62 this.extractorManager = new ExtractorManager(this.docList);
63 this.extractorManager.addExtractor(new IndexExtractor());
64 this.extractorManager.addExtractor(new MetaXMLExtractor());
65
66 // TODO: set up the enrichers...
67
68 // set up the classifiers
69 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
70
71 // set up the indexers
72 this.indexerManager = new IndexerManager(this.docList);
73
74 // configure the collection
75 this.collectionManager.configureCollection(this);
76
77 // prepare a file crawler on the etc directory, and a crawl observer to respond to
78 // the appropriate files.
79 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
80 new CollectionCrawlObserver(this));
81
82 fileCrawler.crawl();
83 }
84
85 public String getLastBuildSequence()
86 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
87 }
88
89 public ClassifierManager getClassifierManager()
90 { return this.classifierManager;
91 }
92
93 public IndexerManager getIndexerManager()
94 { return this.indexerManager;
95 }
96
97 public void addIndexer(IndexerInterface iface)
98 { iface.configure(IndexerManager.outputDir, this.outputDir);
99 this.indexerManager.addIndexer(iface);
100 }
101
102 public void run()
103 { // Initialise collection manager - note start of build
104 this.collectionManager.startBuild();
105
106 // TODO: add expansion (e.g. Zip files)
107
108 for (int i = 0; i < this.inputRoots.size(); i ++)
109 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
110
111 fileCrawler.crawl();
112 }
113 this.extractorManager.extractDocuments();
114 this.classifierManager.classifyDocuments();
115 this.indexerManager.indexDocuments();
116
117 // TODO: validation phase
118
119 if (this.archiveDir != null) {
120 this.docList.writeDocuments(new File(this.archiveDir));
121 // this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
122 }
123
124 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
125
126 // TODO: write out collection configuration file (service lists, etc.)
127
128 this.collectionManager.endBuild();
129 }
130
131 public void addInputDirectory(String inputDir)
132 { this.inputRoots.add(inputDir);
133 }
134}
135
136
Note: See TracBrowser for help on using the repository browser.