source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java@ 6736

Last change on this file since 6736 was 6736, checked in by cs025, 20 years ago

Added factory method, abstract indexer. Also modified Manager and
Interface to support naming of indexers, and MG and MGPP altered
accordingly.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1package org.greenstone.gsdl3.gs3build.indexers;
2
3import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
4import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
5
6import java.io.InputStream;
7import java.io.OutputStream;
8import java.io.IOException;
9
10public class MGPPIndexer extends AbstractIndexer
11{
12 int pass;
13 String name;
14 boolean firstDocument;
15 String outputDirectory;
16 String outputStem;
17 InputStream indexerFeedback;
18 InputStream indexerErrors;
19 OutputStream indexerTextfeed;
20 Process mgpp_passes;
21 static final String documentSeparator = "<Document>";
22
23 public static final String MGPP_INDEX_TYPE = "mgpp";
24
25 public MGPPIndexer(String name)
26 { this.name = name;
27 }
28
29 public String getName()
30 { return this.name;
31 }
32
33 /**
34 * The output directory should be (collection)/building/text/ for
35 * normal Greenstone builds
36 */
37 public boolean configure(String label, String value)
38 {
39 if (label.equals("outputDir")) {
40 this.outputDirectory = value;
41 this.outputStem = value + "/index";
42 }
43 this.pass = 0;
44 return true;
45 }
46
47 public String getIndexType()
48 { return MGPP_INDEX_TYPE;
49 }
50
51 public boolean addIndex(String name, String level, String field)
52 { return true;
53 }
54
55 /**
56 * Index a single document; the document interface can be used to extract individual
57 * metadata items etc. as required or desired and index those instead or as well as
58 * the body text of the document.
59 */
60 public boolean indexDocument(DocumentID docID, DocumentInterface document)
61 {
62 if (!this.firstDocument)
63 { // Send a '<document>' before the document itself
64 try {
65 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length);
66 }
67 catch (IOException ex)
68 { System.out.println("Bad output on end of document" + ex);
69 ex.printStackTrace();
70 return false;
71 }
72 }
73 String docText = document.getDocumentText();
74
75 byte [] bytes = docText.getBytes();
76 int pos = 0, end = bytes.length;
77
78 try {
79 while (pos < end) {
80 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
81 pos = pos + 512;
82
83 try {
84 while (this.indexerFeedback.available() > 0)
85 { byte b[] = new byte[this.indexerFeedback.available()];
86 System.out.println("Feedback of " + this.indexerFeedback.available());
87 this.indexerFeedback.read(b);
88 System.out.println(b);
89 }
90 }
91 catch (IOException ex)
92 {
93 }
94
95
96 try {
97 while (this.indexerErrors.available() > 0)
98 { byte b[] = new byte[this.indexerErrors.available()];
99 System.out.println("Feedback of " + this.indexerErrors.available());
100 this.indexerErrors.read(b);
101 System.out.println(new String(b));
102 }
103 }
104 catch (IOException ex)
105 {
106 }
107 }
108 }
109 catch (IOException ex)
110 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
111 ex.printStackTrace();
112 return false;
113 }
114 this.firstDocument = false;
115
116 try {
117 while (this.indexerErrors.available() > 0)
118 { char c = (char) this.indexerErrors.read();
119 System.out.println(c);
120 }
121 while (this.indexerFeedback.available() > 0)
122 { byte b[] = new byte[this.indexerFeedback.available()];
123 System.out.println("Feedback of " + this.indexerFeedback.available());
124 this.indexerFeedback.read(b);
125 }
126 }
127 catch (IOException ex)
128 {
129 }
130 return true;
131 }
132
133 /**
134 * Initialise the pass: open required files, check status
135 */
136 public boolean startPass(int passNumber)
137 { this.pass = passNumber;
138 this.firstDocument = true;
139
140 try {
141 switch (this.pass) {
142 case 0:
143 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem + " -T1");
144 break;
145
146 case 1:
147 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -T2");
148 break;
149
150 case 2:
151 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I1");
152 break;
153
154 case 3:
155 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem);
156 p.waitFor();
157
158 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I2");
159 break;
160 }
161
162 this.indexerFeedback = mgpp_passes.getInputStream();
163 this.indexerErrors = mgpp_passes.getErrorStream();
164 this.indexerTextfeed = mgpp_passes.getOutputStream();
165 }
166 catch (IOException ex)
167 { System.out.println(ex);
168 ex.printStackTrace();
169 return false;
170 }
171 catch (InterruptedException ex)
172 { System.out.println(ex);
173 ex.printStackTrace();
174 return false;
175 }
176 System.out.println("Pass " + this.pass);
177 return true;
178 }
179
180 /**
181 * Complete a pass - reset file counters, close files, etc.
182 */
183 public boolean endPass(int passNumber)
184 { // TODO: end pass
185 Process p;
186
187 try {
188 this.indexerTextfeed.write((char) 2);
189 this.indexerTextfeed.write(4);
190 while (this.indexerErrors.available() > 0)
191 { char c = (char) this.indexerErrors.read();
192 System.out.print(c);
193 }
194 while (this.indexerFeedback.available() > 0)
195 { byte b[] = new byte[this.indexerFeedback.available()];
196 System.out.print("Feedback of " + this.indexerFeedback.available());
197 this.indexerFeedback.read(b);
198 }
199
200 this.indexerTextfeed.close();
201 Thread.sleep(1000);
202 this.mgpp_passes.waitFor();
203 }
204 catch (IOException ex)
205 { System.out.println(ex);
206 }
207 catch (InterruptedException ex)
208 { System.out.println(ex);
209 }
210 System.out.println("Completed with " + this.mgpp_passes.exitValue());
211
212 try {
213 switch (this.pass)
214 {
215 case 0:
216 System.out.println("Compressing dictionary");
217 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120");
218 p.waitFor();
219 System.out.println(p.exitValue());
220 break;
221
222 case 3:
223 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem);
224 p.waitFor();
225 System.out.println(p.exitValue());
226
227 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem);
228 p.waitFor();
229 System.out.println(p.exitValue());
230
231 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory);
232 p.waitFor();
233 System.out.println(p.exitValue());
234 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory);
235 p.waitFor();
236 System.out.println(p.exitValue());
237 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory);
238 p.waitFor();
239 System.out.println(p.exitValue());
240 break;
241 }
242 }
243 catch (IOException ex)
244 { System.out.println(ex);
245 ex.printStackTrace();
246 return false;
247 }
248 catch (InterruptedException ex)
249 { System.out.println(ex);
250 ex.printStackTrace();
251 return false;
252 }
253 return true;
254 }
255
256 /**
257 * Do any tidying up
258 */
259 public void tidyup()
260 {
261 }
262
263 /**
264 * Return the number of passes required for this index.
265 */
266 public int getNumberOfPasses()
267 { return 4;
268 }
269}
Note: See TracBrowser for help on using the repository browser.