source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java@ 6897

Last change on this file since 6897 was 6897, checked in by kjdon, 20 years ago

added a new method: addServiceDescription

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1package org.greenstone.gsdl3.gs3build.indexers;
2
3import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
4import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
5
6import java.io.InputStream;
7import java.io.OutputStream;
8import java.io.IOException;
9
10public class MGPPIndexer extends AbstractIndexer
11{
12 int pass;
13 String name;
14 boolean firstDocument;
15 String outputDirectory;
16 String outputStem;
17 InputStream indexerFeedback;
18 InputStream indexerErrors;
19 OutputStream indexerTextfeed;
20 Process mgpp_passes;
21 static final String documentSeparator = "<Document>";
22
23 public static final String MGPP_INDEX_TYPE = "mgpp";
24
25 public MGPPIndexer(String name)
26 { this.name = name;
27 }
28
29 public String getName()
30 { return this.name;
31 }
32
33 /**
34 * The output directory should be (collection)/building/text/ for
35 * normal Greenstone builds
36 */
37 public boolean configure(String label, String value)
38 {
39 if (label.equals("outputDir")) {
40 this.outputDirectory = value;
41 this.outputStem = value + "/index";
42 }
43 this.pass = 0;
44 return true;
45 }
46
47 public String getIndexType()
48 { return MGPP_INDEX_TYPE;
49 }
50
51 public boolean addIndex(String name, String level, String field)
52 { return true;
53 }
54
55 /**
56 * Index a single document; the document interface can be used to extract individual
57 * metadata items etc. as required or desired and index those instead or as well as
58 * the body text of the document.
59 */
60 public boolean indexDocument(DocumentID docID, DocumentInterface document)
61 {
62 if (!this.firstDocument)
63 { // Send a '<document>' before the document itself
64 try {
65 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length);
66 }
67 catch (IOException ex)
68 { System.out.println("Bad output on end of document" + ex);
69 ex.printStackTrace();
70 return false;
71 }
72 }
73 String docText = document.getDocumentText();
74
75 byte [] bytes = docText.getBytes();
76 int pos = 0, end = bytes.length;
77
78 try {
79 while (pos < end) {
80 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
81 pos = pos + 512;
82
83 try {
84 while (this.indexerFeedback.available() > 0)
85 { byte b[] = new byte[this.indexerFeedback.available()];
86 System.out.println("Feedback of " + this.indexerFeedback.available());
87 this.indexerFeedback.read(b);
88 System.out.println(b);
89 }
90 }
91 catch (IOException ex)
92 {
93 }
94
95
96 try {
97 while (this.indexerErrors.available() > 0)
98 { byte b[] = new byte[this.indexerErrors.available()];
99 System.out.println("Feedback of " + this.indexerErrors.available());
100 this.indexerErrors.read(b);
101 System.out.println(new String(b));
102 }
103 }
104 catch (IOException ex)
105 {
106 }
107 }
108 }
109 catch (IOException ex)
110 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
111 ex.printStackTrace();
112 return false;
113 }
114 this.firstDocument = false;
115
116 try {
117 while (this.indexerErrors.available() > 0)
118 { char c = (char) this.indexerErrors.read();
119 System.out.println(c);
120 }
121 while (this.indexerFeedback.available() > 0)
122 { byte b[] = new byte[this.indexerFeedback.available()];
123 System.out.println("Feedback of " + this.indexerFeedback.available());
124 this.indexerFeedback.read(b);
125 }
126 }
127 catch (IOException ex)
128 {
129 }
130 return true;
131 }
132
133 /**
134 * Initialise the pass: open required files, check status
135 */
136 public boolean startPass(int passNumber)
137 { this.pass = passNumber;
138 this.firstDocument = true;
139
140 try {
141 switch (this.pass) {
142 case 0:
143 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem + " -T1");
144 break;
145
146 case 1:
147 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -T2");
148 break;
149
150 case 2:
151 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I1");
152 break;
153
154 case 3:
155 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem);
156 p.waitFor();
157
158 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I2");
159 break;
160 }
161
162 this.indexerFeedback = mgpp_passes.getInputStream();
163 this.indexerErrors = mgpp_passes.getErrorStream();
164 this.indexerTextfeed = mgpp_passes.getOutputStream();
165 }
166 catch (IOException ex)
167 { System.out.println(ex);
168 ex.printStackTrace();
169 return false;
170 }
171 catch (InterruptedException ex)
172 { System.out.println(ex);
173 ex.printStackTrace();
174 return false;
175 }
176 System.out.println("Pass " + this.pass);
177 return true;
178 }
179
180 /**
181 * Complete a pass - reset file counters, close files, etc.
182 */
183 public boolean endPass(int passNumber)
184 { // TODO: end pass
185 Process p;
186
187 try {
188 this.indexerTextfeed.write((char) 2);
189 this.indexerTextfeed.write(4);
190 while (this.indexerErrors.available() > 0)
191 { char c = (char) this.indexerErrors.read();
192 System.out.print(c);
193 }
194 while (this.indexerFeedback.available() > 0)
195 { byte b[] = new byte[this.indexerFeedback.available()];
196 System.out.print("Feedback of " + this.indexerFeedback.available());
197 this.indexerFeedback.read(b);
198 }
199
200 this.indexerTextfeed.close();
201 Thread.sleep(1000);
202 this.mgpp_passes.waitFor();
203 }
204 catch (IOException ex)
205 { System.out.println(ex);
206 }
207 catch (InterruptedException ex)
208 { System.out.println(ex);
209 }
210 System.out.println("Completed with " + this.mgpp_passes.exitValue());
211
212 try {
213 switch (this.pass)
214 {
215 case 0:
216 System.out.println("Compressing dictionary");
217 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120");
218 p.waitFor();
219 System.out.println(p.exitValue());
220 break;
221
222 case 3:
223 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem);
224 p.waitFor();
225 System.out.println(p.exitValue());
226
227 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem);
228 p.waitFor();
229 System.out.println(p.exitValue());
230
231 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory);
232 p.waitFor();
233 System.out.println(p.exitValue());
234 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory);
235 p.waitFor();
236 System.out.println(p.exitValue());
237 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory);
238 p.waitFor();
239 System.out.println(p.exitValue());
240 break;
241 }
242 }
243 catch (IOException ex)
244 { System.out.println(ex);
245 ex.printStackTrace();
246 return false;
247 }
248 catch (InterruptedException ex)
249 { System.out.println(ex);
250 ex.printStackTrace();
251 return false;
252 }
253 return true;
254 }
255
256 /**
257 * Do any tidying up
258 */
259 public void tidyup()
260 {
261 }
262
263 /**
264 * Return the number of passes required for this index.
265 */
266 public int getNumberOfPasses()
267 { return 4;
268 }
269
270 public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) {
271 System.out.println("adding service description, MGPPIndexer");
272 return true;
273 }
274
275}
Note: See TracBrowser for help on using the repository browser.