source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 5798

Last change on this file since 5798 was 5798, checked in by cs025, 20 years ago

Adding gs3build code

  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5
6import java.io.File;
7import java.util.List;
8
9public class BuildManager
10{
11 CollectionManager collectionManager;
12 RecogniserManager recogniserManager;
13 IndexerManager indexerManager;
14 ExtractorManager extractorManager;
15 DocumentList docList;
16 List inputRoots;
17 String outputDir;
18
19 public BuildManager(List inputRoots, String collection, String outputDir)
20 {
21 // get the collection configuration information
22 this.collectionManager = new CollectionManager(collection);
23
24 // set up the list of input directories
25 if (inputRoots.size() == 0) {
26 inputRoots.add(this.collectionManager.getImportDirectory());
27 System.out.println("Adding collection import directory to input list");
28 }
29 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager));
30 this.inputRoots = inputRoots;
31
32 // set up the output directory
33 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
34 File outputDirFile = new File(this.outputDir);
35 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
36 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
37 System.exit(1);
38 }
39 System.out.println("Output directory set to " + this.outputDir);
40
41 // initialise any recognisers
42 this.recogniserManager = new RecogniserManager();
43 RecogniserInterface recogniser = new HTMLRecogniser(docList);
44 recogniserManager.addRecogniser(recogniser);
45 recogniser = new TextRecogniser(docList);
46 recogniserManager.addRecogniser(recogniser);
47
48 // set up the extractors
49 this.extractorManager = new ExtractorManager(this.docList);
50
51 // set up the enrichers
52
53 // set up the classifiers
54
55 // set up the indexers
56 this.indexerManager = new IndexerManager(this.docList);
57 IndexerInterface iface = new MGIndexer();
58 iface.configure(this.outputDir);
59 this.indexerManager.addIndexer(iface);
60 }
61
62 public String getLastBuildSequence()
63 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
64 }
65
66 public void run()
67 {
68 this.collectionManager.startBuild();
69
70 for (int i = 0; i < this.inputRoots.size(); i ++)
71 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
72
73 fileCrawler.crawl();
74 }
75 this.indexerManager.indexDocuments();
76
77 this.docList.writeDocuments(new File(outputDir));
78 this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
79
80 this.collectionManager.endBuild();
81 }
82
83 public void addInputDirectory(String inputDir)
84 { this.inputRoots.add(inputDir);
85 }
86}
Note: See TracBrowser for help on using the repository browser.