source: branches/alerting-branch/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java@ 8416

Last change on this file since 8416 was 8416, checked in by schweer, 20 years ago

proof-of-concept implementation for detecting new documents

  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1package org.greenstone.gsdl3.gs3build;
2
3import org.greenstone.gsdl3.gs3build.doctypes.*;
4import org.greenstone.gsdl3.gs3build.indexers.*;
5import org.greenstone.gsdl3.gs3build.extractor.*;
6import org.greenstone.gsdl3.gs3build.classifier.*;
7import org.greenstone.gsdl3.gs3build.collection.*;
8import org.greenstone.gsdl3.gs3build.notifier.*;
9
10import java.io.File;
11import java.util.List;
12
13public class BuildManager
14{
15 CollectionManager collectionManager;
16 RecogniserManager recogniserManager;
17 IndexerManager indexerManager;
18 ClassifierManager classifierManager;
19 ExtractorManager extractorManager;
20 NotifierManager notifierManager;
21 DocumentList docList;
22 List inputRoots;
23 String outputDir;
24 String archiveDir;
25
26 public BuildManager(List inputRoots, String site, String collection, String outputDir)
27 {
28 // get the collection configuration information
29 this.collectionManager = new CollectionManager(site, collection);
30 this.collectionManager.setBuildManager(this);
31 // set up the list of input directories
32 if (inputRoots.size() == 0) {
33 inputRoots.add(this.collectionManager.getImportDirectory());
34 System.out.println("Adding collection import directory to input list " + this.collectionManager.getImportDirectory());
35 }
36 this.docList = new DocumentList(new DocumentIDFactory(this.collectionManager),
37 this.collectionManager.getDatabase());
38 this.inputRoots = inputRoots;
39
40 // set up the output directory
41 this.outputDir = outputDir != null ? outputDir : this.collectionManager.getBuildDirectory();
42 File outputDirFile = new File(this.outputDir);
43 if (!outputDirFile.exists() || !outputDirFile.isDirectory())
44 { System.err.println("Unable to open directory " + this.outputDir + " for writing");
45 System.exit(1);
46 }
47 System.out.println("Output directory set to " + this.outputDir);
48
49 this.archiveDir = this.collectionManager.getArchiveDirectory();
50
51 // TODO: initialise the expanders here...
52
53 // initialise any recognisers
54 this.recogniserManager = new RecogniserManager();
55 RecogniserInterface recogniser = new HTMLRecogniser(docList);
56 recogniserManager.addRecogniser(recogniser);
57 recogniserManager.addRecogniser(new IndexRecogniser(docList));
58 recogniser = new TextRecogniser(docList);
59 recogniserManager.addRecogniser(recogniser);
60 recogniser = new JPEGRecogniser(docList);
61 recogniserManager.addRecogniser(recogniser);
62 /* recogniser = new ExtXMLRecogniser(docList);
63 recogniserManager.addRecogniser(recogniser);*/
64 recogniser = new MetadataRecogniser(docList);
65 recogniserManager.addRecogniser(recogniser);
66
67 // set up the extractors
68 this.extractorManager = new ExtractorManager(this.docList);
69 this.extractorManager.addExtractor(new IndexExtractor());
70 /* this.extractorManager.addExtractor(new ExtXMLExtractor());*/
71 this.extractorManager.addExtractor(new MetaXMLExtractor());
72
73 // TODO: set up the enrichers...
74
75 // set up the classifiers
76 this.classifierManager = new ClassifierManager(this.docList, collectionManager.getDatabase());
77
78 // set up the indexers
79 this.indexerManager = new IndexerManager(this.docList);
80
81 this.notifierManager = new NotifierManager();
82
83 // configure the collection
84 this.collectionManager.configureCollection();
85
86 // prepare a file crawler on the etc directory, and a crawl observer to respond to
87 // the appropriate files.
88 FileCrawler fileCrawler = new FileCrawler(new File(this.collectionManager.getEtcDirectory()),
89 new CollectionCrawlObserver(this));
90
91 fileCrawler.crawl();
92 }
93
94 public String getLastBuildSequence()
95 { return this.collectionManager.getCollectionMetadata("gsdl3", "buildsequence");
96 }
97
98 public ClassifierManager getClassifierManager()
99 { return this.classifierManager;
100 }
101
102 public IndexerManager getIndexerManager()
103 { return this.indexerManager;
104 }
105
106 public void addIndexer(IndexerInterface iface)
107 { iface.configure(IndexerManager.outputDir, this.outputDir);
108 this.indexerManager.addIndexer(iface);
109 }
110
111 public void run()
112 { // Initialise collection manager - note start of build
113 this.collectionManager.startBuild();
114
115 // TODO: add expansion (e.g. Zip files)
116
117 // Crawl the file tree - will recognise documents
118 for (int i = 0; i < this.inputRoots.size(); i ++)
119 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager);
120
121 fileCrawler.crawl();
122 }
123
124 // Extract phase, etc.
125 this.extractorManager.extractDocuments();
126 this.classifierManager.classifyDocuments();
127 this.indexerManager.indexDocuments();
128 this.notifierManager.detectEvents(this.collectionManager.getDatabase(), this.collectionManager.getLastBuildDate());
129
130 // Timestamp management - update all timestamps on modified dates...
131 //
132 // This should only occur at the end of building in case the build is cancelled...
133 docList.updateTimestamps(this.collectionManager.getBuildDate());
134
135 // TODO: validation phase
136
137 if (this.archiveDir != null) {
138 this.docList.writeDocuments(new File(this.archiveDir));
139 // this.docList.writeSQLDocuments(this.collectionManager.getDatabase());
140 }
141
142 this.collectionManager.setCollectionMetadata("gsdl3", "documentCount", Integer.toString(this.docList.getCount()));
143
144 // TODO: write out collection configuration file (service lists, etc.)
145
146 this.collectionManager.endBuild();
147 }
148
149 public void addInputDirectory(String inputDir)
150 { this.inputRoots.add(inputDir);
151 }
152}
153
154
Note: See TracBrowser for help on using the repository browser.