Changeset 8489


Ignore:
Timestamp:
2004-11-09T11:18:02+13:00 (19 years ago)
Author:
kjdon
Message:

formatting changes in preparation for adding my changes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java

    r8455 r8489  
    1212public class BuildManager
    1313{
    14   CollectionManager collectionManager;
    15   RecogniserManager recogniserManager;
    16   IndexerManager    indexerManager;
    17   ClassifierManager classifierManager;
    18   ExtractorManager  extractorManager;
    19   DocumentList      docList;
    20   List              inputRoots;
    21   String            outputDir;
    22   String            archiveDir;
     14    CollectionManager collectionManager;
     15    RecogniserManager recogniserManager;
     16    IndexerManager    indexerManager;
     17    ClassifierManager classifierManager;
     18    ExtractorManager  extractorManager;
     19    DocumentList      docList;
     20    List              inputRoots;
     21    String            outputDir;
     22    String            archiveDir;
    2323   
    24   public BuildManager(List inputRoots, String site, String collection, String outputDir)
    25   {
    26     // get the collection configuration information
    27     this.collectionManager = new CollectionManager(site, collection);
    28     this.collectionManager.setBuildManager(this);
    29     // set up the list of input directories
    30     if (inputRoots.size() == 0) {
    31       inputRoots.add(this.collectionManager.getImportDirectory());
    32       System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
     24    public BuildManager(List inputRoots, String site, String collection, String outputDir)
     25    {
     26    // get the collection configuration information
     27    this.collectionManager = new CollectionManager(site, collection);
     28    this.collectionManager.setBuildManager(this);
     29    // set up the list of input directories
     30    if (inputRoots.size() == 0) {
     31        inputRoots.add(this.collectionManager.getImportDirectory());
     32        System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
     33    }
     34    this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
     35                    this.collectionManager.getDatabase());
     36    this.inputRoots = inputRoots;
     37   
     38    // set up the output directory
     39    this.outputDir  = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
     40    File outputDirFile = new File(this.outputDir);
     41    if (!outputDirFile.exists() || !outputDirFile.isDirectory()) {
     42        System.err.println("Unable to open directory " + this.outputDir + " for writing");
     43        System.exit(1);
     44    }
     45    System.out.println("Output directory set to " + this.outputDir);
     46
     47    this.archiveDir = this.collectionManager.getArchiveDirectory();
     48
     49    // TODO: initialise the expanders here...
     50
     51    // initialise any recognisers
     52    this.recogniserManager = new RecogniserManager();
     53    RecogniserInterface recogniser = new HTMLRecogniser(docList);
     54    recogniserManager.addRecogniser(recogniser);
     55    recogniserManager.addRecogniser(new IndexRecogniser(docList));
     56    recogniser = new TextRecogniser(docList);
     57    recogniserManager.addRecogniser(recogniser);
     58    recogniser = new JPEGRecogniser(docList);
     59    recogniserManager.addRecogniser(recogniser);
     60    /*    recogniser = new ExtXMLRecogniser(docList);
     61          recogniserManager.addRecogniser(recogniser);*/
     62    recogniser = new MetadataRecogniser(docList);
     63    recogniserManager.addRecogniser(recogniser);
     64    recogniser = new GS2METSRecogniser(docList);
     65    recogniserManager.addRecogniser(recogniser);
     66
     67    // set up the extractors
     68    this.extractorManager  = new ExtractorManager(this.docList);
     69    this.extractorManager.addExtractor(new IndexExtractor());
     70    /*    this.extractorManager.addExtractor(new ExtXMLExtractor());*/
     71    this.extractorManager.addExtractor(new MetaXMLExtractor());
     72   
     73    // TODO: set up the enrichers...
     74   
     75    // set up the classifiers
     76    this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
     77       
     78    // set up the indexers
     79    this.indexerManager = new IndexerManager(this.docList);
     80
     81    // configure the collection
     82    this.collectionManager.configureCollection();
     83
     84    // prepare a file crawler on the etc directory, and a crawl observer to respond to
     85    // the appropriate files.
     86    FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
     87                          new CollectionCrawlObserver(this));
     88
     89    fileCrawler.crawl();   
    3390    }
    34     this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
    35                     this.collectionManager.getDatabase());
    36     this.inputRoots = inputRoots;
    37    
    38     // set up the output directory
    39     this.outputDir  = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
    40     File outputDirFile = new File(this.outputDir);
    41     if (!outputDirFile.exists() || !outputDirFile.isDirectory()) {
    42     System.err.println("Unable to open directory " + this.outputDir + " for writing");
    43     System.exit(1);
    44     }
    45     System.out.println("Output directory set to " + this.outputDir);
    46 
    47     this.archiveDir = this.collectionManager.getArchiveDirectory();
    48 
    49     // TODO: initialise the expanders here...
    50 
    51     // initialise any recognisers
    52     this.recogniserManager = new RecogniserManager();
    53     RecogniserInterface recogniser = new HTMLRecogniser(docList);
    54     recogniserManager.addRecogniser(recogniser);
    55     recogniserManager.addRecogniser(new IndexRecogniser(docList));
    56     recogniser = new TextRecogniser(docList);
    57     recogniserManager.addRecogniser(recogniser);
    58     recogniser = new JPEGRecogniser(docList);
    59     recogniserManager.addRecogniser(recogniser);
    60     /*    recogniser = new ExtXMLRecogniser(docList);
    61       recogniserManager.addRecogniser(recogniser);*/
    62     recogniser = new MetadataRecogniser(docList);
    63     recogniserManager.addRecogniser(recogniser);
    64     recogniser = new GS2METSRecogniser(docList);
    65     recogniserManager.addRecogniser(recogniser);
    66 
    67     // set up the extractors
    68     this.extractorManager  = new ExtractorManager(this.docList);
    69     this.extractorManager.addExtractor(new IndexExtractor());
    70     /*    this.extractorManager.addExtractor(new ExtXMLExtractor());*/
    71     this.extractorManager.addExtractor(new MetaXMLExtractor());
    72    
    73     // TODO: set up the enrichers...
    74    
    75     // set up the classifiers
    76     this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
    77        
    78     // set up the indexers
    79     this.indexerManager = new IndexerManager(this.docList);
    80 
    81     // configure the collection
    82     this.collectionManager.configureCollection();
    83 
    84     // prepare a file crawler on the etc directory, and a crawl observer to respond to
    85     // the appropriate files.
    86     FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
    87                           new CollectionCrawlObserver(this));
    88 
    89     fileCrawler.crawl();   
    90   }
    9191 
    92   public String getLastBuildSequence()
    93   { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
    94   }
    95 
    96   public ClassifierManager getClassifierManager()
    97   { return this.classifierManager;
    98   }
    99 
    100   public IndexerManager getIndexerManager()
    101   { return this.indexerManager;
    102   }
    103 
    104   public void addIndexer(IndexerInterface iface)
    105   { iface.configure(IndexerManager.outputDir, this.outputDir);
    106     this.indexerManager.addIndexer(iface);
    107   }
    108 
    109   public void run()
    110   { // Initialise collection manager - note start of build
    111     this.collectionManager.startBuild();
    112 
    113     // TODO: add expansion (e.g. Zip files)
    114    
    115     // Crawl the file tree - will recognise documents
    116     for (int i = 0; i < this.inputRoots.size(); i ++)
    117     { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
    118 
    119       fileCrawler.crawl();
     92    public String getLastBuildSequence()
     93    {
     94    return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
    12095    }
    12196
    122     // Extract phase, etc.
    123     this.extractorManager.extractDocuments();
    124     this.classifierManager.classifyDocuments();
    125     this.indexerManager.indexDocuments();
     97    public ClassifierManager getClassifierManager()
     98    {
     99    return this.classifierManager;
     100    }
    126101
    127     // Timestamp management - update all timestamps on modified dates...
    128     //
    129     // This should only occur at the end of building in case the build is cancelled...
    130     docList.updateTimestamps(this.collectionManager.getBuildDate());
     102    public IndexerManager getIndexerManager()
     103    {
     104    return this.indexerManager;
     105    }
    131106
    132     // TODO: validation phase
     107    public void addIndexer(IndexerInterface iface)
     108    {
     109    iface.configure(IndexerManager.outputDir, this.outputDir);
     110    this.indexerManager.addIndexer(iface);
     111    }
    133112
    134     if (this.archiveDir != null) {
    135       this.docList.writeDocuments(new File(this.archiveDir));
    136       //      this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
     113    public void run()
     114    {
     115    // Initialise collection manager - note start of build
     116    this.collectionManager.startBuild();
     117
     118    // TODO: add expansion (e.g. Zip files)
     119   
     120    // Crawl the file tree - will recognise documents
     121    for (int i = 0; i < this.inputRoots.size(); i ++) {
     122        FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
     123       
     124        fileCrawler.crawl();
     125    }
     126
     127    // Extract phase, etc.
     128    this.extractorManager.extractDocuments();
     129    this.classifierManager.classifyDocuments();
     130    this.indexerManager.indexDocuments();
     131
     132    // Timestamp management - update all timestamps on modified dates...
     133    //
     134    // This should only occur at the end of building in case the build is cancelled...
     135    docList.updateTimestamps(this.collectionManager.getBuildDate());
     136
     137    // TODO: validation phase
     138
     139    if (this.archiveDir != null) {
     140        this.docList.writeDocuments(new File(this.archiveDir));
     141        //this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
     142    }
     143   
     144    this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
     145   
     146    // TODO: write out collection configuration file (service lists, etc.)
     147   
     148    this.collectionManager.endBuild();
    137149    }
    138    
    139     this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
    140    
    141     // TODO: write out collection configuration file (service lists, etc.)
    142    
    143     this.collectionManager.endBuild();
    144   }
    145150 
    146   public void addInputDirectory(String inputDir)
    147   { this.inputRoots.add(inputDir);
    148   }
     151    public void addInputDirectory(String inputDir)
     152    {
     153    this.inputRoots.add(inputDir);
     154    }
    149155}
    150156
Note: See TracChangeset for help on using the changeset viewer.