package org.greenstone.gsdl3.gs3build; import org.greenstone.gsdl3.gs3build.doctypes.*; import org.greenstone.gsdl3.gs3build.indexers.*; import org.greenstone.gsdl3.gs3build.notifier.NotifierManager; import org.greenstone.gsdl3.gs3build.extractor.*; import org.greenstone.gsdl3.gs3build.classifier.*; import org.greenstone.gsdl3.gs3build.collection.*; import java.io.File; import java.util.List; public class BuildManager { CollectionManager collectionManager; RecogniserManager recogniserManager; IndexerManager indexerManager; ClassifierManager classifierManager; ExtractorManager extractorManager; NotifierManager notifierManager; DocumentList docList; List inputRoots; String outputDir; String archiveDir; public BuildManager(List inputRoots, String site, String collection, String outputDir) { // set up the collection manager this.collectionManager = new CollectionManager(site, collection); this.collectionManager.setBuildManager(this); // set up the list of input directories if (inputRoots.size() == 0) { inputRoots.add(this.collectionManager.getImportDirectory()); System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory()); } this.inputRoots = inputRoots; // set up the output directories (building and archive) this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory(); File outputDirFile = new File(this.outputDir); if (!outputDirFile.isDirectory()) { System.err.println("Unable to open directory " + this.outputDir + " for writing"); System.exit(1); } System.out.println("Output directory set to " + this.outputDir); // set up the archive directory - this will definitely exist this.archiveDir = this.collectionManager.getArchiveDirectory(); // the list that documents get added to when they are recognised this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager), this.collectionManager.getDatabase()); // set up the various managers this.recogniserManager = new RecogniserManager(this.docList); this.extractorManager = new ExtractorManager(this.docList); this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase()); this.indexerManager = new IndexerManager(this.docList); this.notifierManager = new NotifierManager(); // configure the collection - this will add classifiers, indexers, recognisers to the various managers. this.collectionManager.configureCollection(); // add in expanders by hand cos they are not in the config file yet // TODO // add in extractors by hand cos they are not in the config file yet this.extractorManager.addExtractor(new IndexExtractor()); this.extractorManager.addExtractor(new MetaXMLExtractor()); // add in enrichers by hand cos they are not in the config file yet // TODO // prepare a file crawler on the etc directory, and a crawl observer to respond to // the appropriate files. FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()), new CollectionCrawlObserver(this)); fileCrawler.crawl(); } public String getLastBuildSequence() { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence"); } public ClassifierManager getClassifierManager() { return this.classifierManager; } public IndexerManager getIndexerManager() { return this.indexerManager; } // TODO: validation phase public RecogniserManager getRecogniserManager() { return this.recogniserManager; } public void addIndexer(IndexerInterface iface) { iface.configure(IndexerManager.outputDir, this.outputDir); this.indexerManager.addIndexer(iface); } public void run() { // Initialise collection manager - note start of build this.collectionManager.startBuild(); // TODO: add expansion (e.g. Zip files) // Crawl the file tree - will recognise documents for (int i = 0; i < this.inputRoots.size(); i ++) { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager); fileCrawler.crawl(); } // Extract phase, etc. this.extractorManager.extractDocuments(); this.classifierManager.classifyDocuments(); this.indexerManager.indexDocuments(); // Timestamp management - update all timestamps on modified dates... // // This should only occur at the end of building in case the build is cancelled... docList.updateTimestamps(this.collectionManager.getBuildTimestamp()); // TODO: validation phase this.notifierManager.detectEvents(this.collectionManager); if (this.archiveDir != null) { this.docList.writeDocuments(new File(this.archiveDir)); //this.docList.writeSQLDocuments(this.collectionManager.getDatabase()); } this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount())); // TODO: write out collection configuration file (service lists, etc.) this.collectionManager.endBuild(); } public void addInputDirectory(String inputDir) { this.inputRoots.add(inputDir); } }