Changeset 8489 for trunk/gsdl3/src/java
- Timestamp:
- 2004-11-09T11:18:02+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java
r8455 r8489 12 12 public class BuildManager 13 13 { 14 CollectionManager collectionManager;15 RecogniserManager recogniserManager;16 IndexerManager indexerManager;17 ClassifierManager classifierManager;18 ExtractorManager extractorManager;19 DocumentList docList;20 List inputRoots;21 String outputDir;22 String archiveDir;14 CollectionManager collectionManager; 15 RecogniserManager recogniserManager; 16 IndexerManager indexerManager; 17 ClassifierManager classifierManager; 18 ExtractorManager extractorManager; 19 DocumentList docList; 20 List inputRoots; 21 String outputDir; 22 String archiveDir; 23 23 24 public BuildManager(List inputRoots, String site, String collection, String outputDir) 25 { 26 // get the collection configuration information 27 this.collectionManager = new CollectionManager(site, collection); 28 this.collectionManager.setBuildManager(this); 29 // set up the list of input directories 30 if (inputRoots.size() == 0) { 31 inputRoots.add(this.collectionManager.getImportDirectory()); 32 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory()); 24 public BuildManager(List inputRoots, String site, String collection, String outputDir) 25 { 26 // get the collection configuration information 27 this.collectionManager = new CollectionManager(site, collection); 28 this.collectionManager.setBuildManager(this); 29 // set up the list of input directories 30 if (inputRoots.size() == 0) { 31 inputRoots.add(this.collectionManager.getImportDirectory()); 32 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory()); 33 } 34 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager), 35 this.collectionManager.getDatabase()); 36 this.inputRoots = inputRoots; 37 38 // set up the output directory 39 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory(); 40 File outputDirFile = new File(this.outputDir); 41 if (!outputDirFile.exists() || !outputDirFile.isDirectory()) { 42 System.err.println("Unable to open directory " + this.outputDir + " for writing"); 43 System.exit(1); 44 } 45 System.out.println("Output directory set to " + this.outputDir); 46 47 this.archiveDir = this.collectionManager.getArchiveDirectory(); 48 49 // TODO: initialise the expanders here... 50 51 // initialise any recognisers 52 this.recogniserManager = new RecogniserManager(); 53 RecogniserInterface recogniser = new HTMLRecogniser(docList); 54 recogniserManager.addRecogniser(recogniser); 55 recogniserManager.addRecogniser(new IndexRecogniser(docList)); 56 recogniser = new TextRecogniser(docList); 57 recogniserManager.addRecogniser(recogniser); 58 recogniser = new JPEGRecogniser(docList); 59 recogniserManager.addRecogniser(recogniser); 60 /* recogniser = new ExtXMLRecogniser(docList); 61 recogniserManager.addRecogniser(recogniser);*/ 62 recogniser = new MetadataRecogniser(docList); 63 recogniserManager.addRecogniser(recogniser); 64 recogniser = new GS2METSRecogniser(docList); 65 recogniserManager.addRecogniser(recogniser); 66 67 // set up the extractors 68 this.extractorManager = new ExtractorManager(this.docList); 69 this.extractorManager.addExtractor(new IndexExtractor()); 70 /* this.extractorManager.addExtractor(new ExtXMLExtractor());*/ 71 this.extractorManager.addExtractor(new MetaXMLExtractor()); 72 73 // TODO: set up the enrichers... 74 75 // set up the classifiers 76 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase()); 77 78 // set up the indexers 79 this.indexerManager = new IndexerManager(this.docList); 80 81 // configure the collection 82 this.collectionManager.configureCollection(); 83 84 // prepare a file crawler on the etc directory, and a crawl observer to respond to 85 // the appropriate files. 86 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()), 87 new CollectionCrawlObserver(this)); 88 89 fileCrawler.crawl(); 33 90 } 34 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),35 this.collectionManager.getDatabase());36 this.inputRoots = inputRoots;37 38 // set up the output directory39 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();40 File outputDirFile = new File(this.outputDir);41 if (!outputDirFile.exists() || !outputDirFile.isDirectory()) {42 System.err.println("Unable to open directory " + this.outputDir + " for writing");43 System.exit(1);44 }45 System.out.println("Output directory set to " + this.outputDir);46 47 this.archiveDir = this.collectionManager.getArchiveDirectory();48 49 // TODO: initialise the expanders here...50 51 // initialise any recognisers52 this.recogniserManager = new RecogniserManager();53 RecogniserInterface recogniser = new HTMLRecogniser(docList);54 recogniserManager.addRecogniser(recogniser);55 recogniserManager.addRecogniser(new IndexRecogniser(docList));56 recogniser = new TextRecogniser(docList);57 recogniserManager.addRecogniser(recogniser);58 recogniser = new JPEGRecogniser(docList);59 recogniserManager.addRecogniser(recogniser);60 /* recogniser = new ExtXMLRecogniser(docList);61 recogniserManager.addRecogniser(recogniser);*/62 recogniser = new MetadataRecogniser(docList);63 recogniserManager.addRecogniser(recogniser);64 recogniser = new GS2METSRecogniser(docList);65 recogniserManager.addRecogniser(recogniser);66 67 // set up the extractors68 this.extractorManager = new ExtractorManager(this.docList);69 this.extractorManager.addExtractor(new IndexExtractor());70 /* this.extractorManager.addExtractor(new ExtXMLExtractor());*/71 this.extractorManager.addExtractor(new MetaXMLExtractor());72 73 // TODO: set up the enrichers...74 75 // set up the classifiers76 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());77 78 // set up the indexers79 this.indexerManager = new IndexerManager(this.docList);80 81 // configure the collection82 this.collectionManager.configureCollection();83 84 // prepare a file crawler on the etc directory, and a crawl observer to respond to85 // the appropriate files.86 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),87 new CollectionCrawlObserver(this));88 89 fileCrawler.crawl();90 }91 91 92 public String getLastBuildSequence() 93 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence"); 94 } 95 96 public ClassifierManager getClassifierManager() 97 { return this.classifierManager; 98 } 99 100 public IndexerManager getIndexerManager() 101 { return this.indexerManager; 102 } 103 104 public void addIndexer(IndexerInterface iface) 105 { iface.configure(IndexerManager.outputDir, this.outputDir); 106 this.indexerManager.addIndexer(iface); 107 } 108 109 public void run() 110 { // Initialise collection manager - note start of build 111 this.collectionManager.startBuild(); 112 113 // TODO: add expansion (e.g. Zip files) 114 115 // Crawl the file tree - will recognise documents 116 for (int i = 0; i < this.inputRoots.size(); i ++) 117 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager); 118 119 fileCrawler.crawl(); 92 public String getLastBuildSequence() 93 { 94 return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence"); 120 95 } 121 96 122 // Extract phase, etc.123 this.extractorManager.extractDocuments();124 this.classifierManager.classifyDocuments();125 this.indexerManager.indexDocuments();97 public ClassifierManager getClassifierManager() 98 { 99 return this.classifierManager; 100 } 126 101 127 // Timestamp management - update all timestamps on modified dates...128 //129 // This should only occur at the end of building in case the build is cancelled... 130 docList.updateTimestamps(this.collectionManager.getBuildDate());102 public IndexerManager getIndexerManager() 103 { 104 return this.indexerManager; 105 } 131 106 132 // TODO: validation phase 107 public void addIndexer(IndexerInterface iface) 108 { 109 iface.configure(IndexerManager.outputDir, this.outputDir); 110 this.indexerManager.addIndexer(iface); 111 } 133 112 134 if (this.archiveDir != null) { 135 this.docList.writeDocuments(new File(this.archiveDir)); 136 // this.docList.writeSQLDocuments(this.collectionManager.getDatabase()); 113 public void run() 114 { 115 // Initialise collection manager - note start of build 116 this.collectionManager.startBuild(); 117 118 // TODO: add expansion (e.g. Zip files) 119 120 // Crawl the file tree - will recognise documents 121 for (int i = 0; i < this.inputRoots.size(); i ++) { 122 FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager); 123 124 fileCrawler.crawl(); 125 } 126 127 // Extract phase, etc. 128 this.extractorManager.extractDocuments(); 129 this.classifierManager.classifyDocuments(); 130 this.indexerManager.indexDocuments(); 131 132 // Timestamp management - update all timestamps on modified dates... 133 // 134 // This should only occur at the end of building in case the build is cancelled... 135 docList.updateTimestamps(this.collectionManager.getBuildDate()); 136 137 // TODO: validation phase 138 139 if (this.archiveDir != null) { 140 this.docList.writeDocuments(new File(this.archiveDir)); 141 //this.docList.writeSQLDocuments(this.collectionManager.getDatabase()); 142 } 143 144 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount())); 145 146 // TODO: write out collection configuration file (service lists, etc.) 147 148 this.collectionManager.endBuild(); 137 149 } 138 139 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));140 141 // TODO: write out collection configuration file (service lists, etc.)142 143 this.collectionManager.endBuild();144 }145 150 146 public void addInputDirectory(String inputDir) 147 { this.inputRoots.add(inputDir); 148 } 151 public void addInputDirectory(String inputDir) 152 { 153 this.inputRoots.add(inputDir); 154 } 149 155 } 150 156
Note:
See TracChangeset
for help on using the changeset viewer.