Changeset 8490


Ignore:
Timestamp:
2004-11-09T11:22:56+13:00 (19 years ago)
Author:
kjdon
Message:

slightly reordered the code, removed the addition of recognisers, cos this is now done from the config file <recognise><docType name='HTML'/>...</recognise>

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java

    r8489 r8490  
    2121    String            outputDir;
    2222    String            archiveDir;
    23    
    24     public BuildManager(List inputRoots, String site, String collection, String outputDir)
     23   
     24    public BuildManager(List inputRoots, String site, String collection, String outputDir) 
    2525    {
    26     // get the collection configuration information
     26    // set up the collection manager
    2727    this.collectionManager = new CollectionManager(site, collection);
    2828    this.collectionManager.setBuildManager(this);
     29   
    2930    // set up the list of input directories
    3031    if (inputRoots.size() == 0) {
     
    3233        System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
    3334    }
    34     this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
    35                     this.collectionManager.getDatabase());
    3635    this.inputRoots = inputRoots;
    37    
    38     // set up the output directory
     36   
     37    // set up the output directories (building and archive)
    3938    this.outputDir  = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
    4039    File outputDirFile = new File(this.outputDir);
    41     if (!outputDirFile.exists() || !outputDirFile.isDirectory()) {
     40    if (!outputDirFile.isDirectory()) {
    4241        System.err.println("Unable to open directory " + this.outputDir + " for writing");
    4342        System.exit(1);
    4443    }
    4544    System.out.println("Output directory set to " + this.outputDir);
    46 
     45   
     46    // set up the archive directory - this will definitely exist
    4747    this.archiveDir = this.collectionManager.getArchiveDirectory();
    4848
    49     // TODO: initialise the expanders here...
    50 
    51     // initialise any recognisers
    52     this.recogniserManager = new RecogniserManager();
    53     RecogniserInterface recogniser = new HTMLRecogniser(docList);
    54     recogniserManager.addRecogniser(recogniser);
    55     recogniserManager.addRecogniser(new IndexRecogniser(docList));
    56     recogniser = new TextRecogniser(docList);
    57     recogniserManager.addRecogniser(recogniser);
    58     recogniser = new JPEGRecogniser(docList);
    59     recogniserManager.addRecogniser(recogniser);
    60     /*    recogniser = new ExtXMLRecogniser(docList);
    61           recogniserManager.addRecogniser(recogniser);*/
    62     recogniser = new MetadataRecogniser(docList);
    63     recogniserManager.addRecogniser(recogniser);
    64     recogniser = new GS2METSRecogniser(docList);
    65     recogniserManager.addRecogniser(recogniser);
    66 
    67     // set up the extractors
     49    // the list that documents get added to when they are recognised
     50    this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
     51                    this.collectionManager.getDatabase());
     52   
     53    // set up the various managers
     54    this.recogniserManager = new RecogniserManager(this.docList);
    6855    this.extractorManager  = new ExtractorManager(this.docList);
     56    this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
     57    this.indexerManager = new IndexerManager(this.docList);
     58   
     59    // configure the collection - this will add classifiers, indexers, recognisers to the various managers.
     60    this.collectionManager.configureCollection();
     61   
     62    // add in expanders by hand cos they are not in the config file yet
     63    // TODO
     64    // add in extractors by hand cos they are not in the config file yet
    6965    this.extractorManager.addExtractor(new IndexExtractor());
    70     /*    this.extractorManager.addExtractor(new ExtXMLExtractor());*/
    7166    this.extractorManager.addExtractor(new MetaXMLExtractor());
    72    
    73     // TODO: set up the enrichers...
    74    
    75     // set up the classifiers
    76     this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
    77        
    78     // set up the indexers
    79     this.indexerManager = new IndexerManager(this.docList);
    80 
    81     // configure the collection
    82     this.collectionManager.configureCollection();
    83 
     67   
     68    // add in enrichers by hand cos they are not in the config file yet
     69    // TODO
     70       
    8471    // prepare a file crawler on the etc directory, and a crawl observer to respond to
    8572    // the appropriate files.
     
    10592    }
    10693
     94    public RecogniserManager getRecogniserManager()
     95    {
     96    return this.recogniserManager;
     97    }
     98   
    10799    public void addIndexer(IndexerInterface iface)
    108100    {
     
    115107    // Initialise collection manager - note start of build
    116108    this.collectionManager.startBuild();
    117 
     109   
    118110    // TODO: add expansion (e.g. Zip files)
    119111   
     
    121113    for (int i = 0; i < this.inputRoots.size(); i ++) {
    122114        FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
    123        
     115
    124116        fileCrawler.crawl();
    125117    }
     
    134126    // This should only occur at the end of building in case the build is cancelled...
    135127    docList.updateTimestamps(this.collectionManager.getBuildDate());
    136 
     128   
    137129    // TODO: validation phase
    138 
     130   
    139131    if (this.archiveDir != null) {
    140132        this.docList.writeDocuments(new File(this.archiveDir));
    141133        //this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
    142134    }
    143    
     135   
    144136    this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
    145137   
     
    150142 
    151143    public void addInputDirectory(String inputDir)
    152     { 
     144    {
    153145    this.inputRoots.add(inputDir);
    154146    }
Note: See TracChangeset for help on using the changeset viewer.