source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 8408

Last change on this file since 8408 was 8408, checked in by schweer, 20 years ago

George's changes to detect documents that are new or have changed since the last build process. (his CVS account currently doesn't work)

  • Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6import org.greenstone.gsdl3.gs3build.classifier.*;
7import org.greenstone.gsdl3.gs3build.collection.*;
8
9import java.io.File;
10import java.util.List;
11
12public class BuildManager
13{
14 CollectionManager collectionManager;
15 RecogniserManager recogniserManager;
16 IndexerManager indexerManager;
17 ClassifierManager classifierManager;
18 ExtractorManager extractorManager;
19 DocumentList docList;
20 List inputRoots;
21 String outputDir;
22 String archiveDir;
23
24 public BuildManager(List inputRoots, String site, String collection, String outputDir)
25 {
26 // get the collection configuration information
27 this.collectionManager = new CollectionManager(site, collection);
28 this.collectionManager.setBuildManager(this);
29 // set up the list of input directories
30 if (inputRoots.size() == 0) {
31 inputRoots.add(this.collectionManager.getImportDirectory());
32 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
33 }
34 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
35 this.collectionManager.getDatabase());
36 this.inputRoots = inputRoots;
37
38 // set up the output directory
39 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
40 File outputDirFile = new File(this.outputDir);
41 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
42 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
43 System.exit(1);
44 }
45 System.out.println("Output directory set to " + this.outputDir);
46
47 this.archiveDir = this.collectionManager.getArchiveDirectory();
48
49 // TODO: initialise the expanders here...
50
51 // initialise any recognisers
52 this.recogniserManager = new RecogniserManager();
53 RecogniserInterface recogniser = new HTMLRecogniser(docList);
54 recogniserManager.addRecogniser(recogniser);
55 recogniserManager.addRecogniser(new IndexRecogniser(docList));
56 recogniser = new TextRecogniser(docList);
57 recogniserManager.addRecogniser(recogniser);
58 recogniser = new JPEGRecogniser(docList);
59 recogniserManager.addRecogniser(recogniser);
60 /* recogniser = new ExtXMLRecogniser(docList);
61 recogniserManager.addRecogniser(recogniser);*/
62 recogniser = new MetadataRecogniser(docList);
63 recogniserManager.addRecogniser(recogniser);
64
65 // set up the extractors
66 this.extractorManager = new ExtractorManager(this.docList);
67 this.extractorManager.addExtractor(new IndexExtractor());
68 /* this.extractorManager.addExtractor(new ExtXMLExtractor());*/
69 this.extractorManager.addExtractor(new MetaXMLExtractor());
70
71 // TODO: set up the enrichers...
72
73 // set up the classifiers
74 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
75
76 // set up the indexers
77 this.indexerManager = new IndexerManager(this.docList);
78
79 // configure the collection
80 this.collectionManager.configureCollection();
81
82 // prepare a file crawler on the etc directory, and a crawl observer to respond to
83 // the appropriate files.
84 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
85 new CollectionCrawlObserver(this));
86
87 fileCrawler.crawl();
88 }
89
90 public String getLastBuildSequence()
91 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
92 }
93
94 public ClassifierManager getClassifierManager()
95 { return this.classifierManager;
96 }
97
98 public IndexerManager getIndexerManager()
99 { return this.indexerManager;
100 }
101
102 public void addIndexer(IndexerInterface iface)
103 { iface.configure(IndexerManager.outputDir, this.outputDir);
104 this.indexerManager.addIndexer(iface);
105 }
106
107 public void run()
108 { // Initialise collection manager - note start of build
109 this.collectionManager.startBuild();
110
111 // TODO: add expansion (e.g. Zip files)
112
113 // Crawl the file tree - will recognise documents
114 for (int i = 0; i < this.inputRoots.size(); i ++)
115 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
116
117 fileCrawler.crawl();
118 }
119
120 // Extract phase, etc.
121 this.extractorManager.extractDocuments();
122 this.classifierManager.classifyDocuments();
123 this.indexerManager.indexDocuments();
124
125 // Timestamp management - update all timestamps on modified dates...
126 //
127 // This should only occur at the end of building in case the build is cancelled...
128 docList.updateTimestamps(this.collectionManager.getBuildDate());
129
130 // TODO: validation phase
131
132 if (this.archiveDir != null) {
133 this.docList.writeDocuments(new File(this.archiveDir));
134 // this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
135 }
136
137 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
138
139 // TODO: write out collection configuration file (service lists, etc.)
140
141 this.collectionManager.endBuild();
142 }
143
144 public void addInputDirectory(String inputDir)
145 { this.inputRoots.add(inputDir);
146 }
147}
148
149
Note: See TracBrowser for help on using the repository browser.