source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 8699

Last change on this file since 8699 was 8490, checked in by kjdon, 20 years ago

slightly reordered the code, removed the addition of recognisers, cos this is now done from the config file <recognise><docType name='HTML'/>...</recognise>

  • Property svn:keywords set to Author Date Id Revision
File size: 4.9 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6import org.greenstone.gsdl3.gs3build.classifier.*;
7import org.greenstone.gsdl3.gs3build.collection.*;
8
9import java.io.File;
10import java.util.List;
11
12public class BuildManager
13{
14 CollectionManager collectionManager;
15 RecogniserManager recogniserManager;
16 IndexerManager indexerManager;
17 ClassifierManager classifierManager;
18 ExtractorManager extractorManager;
19 DocumentList docList;
20 List inputRoots;
21 String outputDir;
22 String archiveDir;
23
24 public BuildManager(List inputRoots, String site, String collection, String outputDir)
25 {
26 // set up the collection manager
27 this.collectionManager = new CollectionManager(site, collection);
28 this.collectionManager.setBuildManager(this);
29
30 // set up the list of input directories
31 if (inputRoots.size() == 0) {
32 inputRoots.add(this.collectionManager.getImportDirectory());
33 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
34 }
35 this.inputRoots = inputRoots;
36
37 // set up the output directories (building and archive)
38 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
39 File outputDirFile = new File(this.outputDir);
40 if (!outputDirFile.isDirectory()) {
41 System.err.println("Unable to open directory " + this.outputDir + " for writing");
42 System.exit(1);
43 }
44 System.out.println("Output directory set to " + this.outputDir);
45
46 // set up the archive directory - this will definitely exist
47 this.archiveDir = this.collectionManager.getArchiveDirectory();
48
49 // the list that documents get added to when they are recognised
50 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
51 this.collectionManager.getDatabase());
52
53 // set up the various managers
54 this.recogniserManager = new RecogniserManager(this.docList);
55 this.extractorManager = new ExtractorManager(this.docList);
56 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
57 this.indexerManager = new IndexerManager(this.docList);
58
59 // configure the collection - this will add classifiers, indexers, recognisers to the various managers.
60 this.collectionManager.configureCollection();
61
62 // add in expanders by hand cos they are not in the config file yet
63 // TODO
64 // add in extractors by hand cos they are not in the config file yet
65 this.extractorManager.addExtractor(new IndexExtractor());
66 this.extractorManager.addExtractor(new MetaXMLExtractor());
67
68 // add in enrichers by hand cos they are not in the config file yet
69 // TODO
70
71 // prepare a file crawler on the etc directory, and a crawl observer to respond to
72 // the appropriate files.
73 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
74 new CollectionCrawlObserver(this));
75
76 fileCrawler.crawl();
77 }
78
79 public String getLastBuildSequence()
80 {
81 return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
82 }
83
84 public ClassifierManager getClassifierManager()
85 {
86 return this.classifierManager;
87 }
88
89 public IndexerManager getIndexerManager()
90 {
91 return this.indexerManager;
92 }
93
94 public RecogniserManager getRecogniserManager()
95 {
96 return this.recogniserManager;
97 }
98
99 public void addIndexer(IndexerInterface iface)
100 {
101 iface.configure(IndexerManager.outputDir, this.outputDir);
102 this.indexerManager.addIndexer(iface);
103 }
104
105 public void run()
106 {
107 // Initialise collection manager - note start of build
108 this.collectionManager.startBuild();
109
110 // TODO: add expansion (e.g. Zip files)
111
112 // Crawl the file tree - will recognise documents
113 for (int i = 0; i < this.inputRoots.size(); i ++) {
114 FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
115
116 fileCrawler.crawl();
117 }
118
119 // Extract phase, etc.
120 this.extractorManager.extractDocuments();
121 this.classifierManager.classifyDocuments();
122 this.indexerManager.indexDocuments();
123
124 // Timestamp management - update all timestamps on modified dates...
125 //
126 // This should only occur at the end of building in case the build is cancelled...
127 docList.updateTimestamps(this.collectionManager.getBuildDate());
128
129 // TODO: validation phase
130
131 if (this.archiveDir != null) {
132 this.docList.writeDocuments(new File(this.archiveDir));
133 //this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
134 }
135
136 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
137
138 // TODO: write out collection configuration file (service lists, etc.)
139
140 this.collectionManager.endBuild();
141 }
142
143 public void addInputDirectory(String inputDir)
144 {
145 this.inputRoots.add(inputDir);
146 }
147}
148
149
Note: See TracBrowser for help on using the repository browser.