Changeset 8927 for trunk/gsdl3/src/java
- Timestamp:
- 2005-01-24T15:38:47+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java
r7476 r8927 8 8 import java.io.IOException; 9 9 10 import org.w3c.dom.Element; 11 10 12 public class MGPPIndexer extends AbstractIndexer 11 13 { 12 int pass; 13 int documentSeqNo; 14 String name; 15 boolean firstDocument; 16 String outputDirectory; 17 String outputStem; 18 String passExtra; 19 InputStream indexerFeedback; 20 InputStream indexerErrors; 21 OutputStream indexerTextfeed; 22 Process mgpp_passes; 23 static final String documentSeparator = "<Document>"; 24 static final String sectionSeparator = "<Section>"; 25 26 public static final String MGPP_INDEX_TYPE = "mgpp"; 27 28 public MGPPIndexer(String name) 29 { this.name = name; 30 this.passExtra = ""; 31 } 32 33 public String getName() 34 { return this.name; 35 } 36 37 /** 38 * The output directory should be (collection)/building/text/ for 39 * normal Greenstone builds 40 */ 41 public boolean configure(String label, String value) 42 { 43 if (label.equals("outputDir")) { 44 this.outputDirectory = value; 45 this.outputStem = value + "/index"; 46 } 47 this.pass = 0; 48 return true; 49 } 50 51 public String getIndexType() 52 { return MGPP_INDEX_TYPE; 53 } 54 55 public boolean addIndex(String name, String level, String field) 56 { 57 if (level == "doc_level") { 58 passExtra = " -J " + level; 59 } 60 else { 61 passExtra = " -K " + level; 62 } 63 return true; 64 } 65 66 /** 67 * Index a single document; the document interface can be used to extract individual 68 * metadata items etc. as required or desired and index those instead or as well as 69 * the body text of the document. 70 */ 71 public boolean indexDocument(DocumentID docID, DocumentInterface document) 72 { 73 if (this.pass == 0) { 74 document.removeAllMetadata("gsdl3", "mgppseqno"); 75 } 76 77 if (!this.firstDocument) 78 { // Send a '<document>' before the document itself 79 try { 80 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 81 } 82 catch (IOException ex) 83 { System.out.println("Bad output on end of document" + ex); 84 ex.printStackTrace(); 85 return false; 86 } 87 } 88 89 String docText = document.getDocumentText(); 90 int startSeqNo = this.documentSeqNo; 14 int pass; 15 int documentSeqNo; 16 String name; 17 boolean firstDocument; 18 String outputDirectory; 19 String outputStem; 20 String passExtra; 21 InputStream indexerFeedback; 22 InputStream indexerErrors; 23 OutputStream indexerTextfeed; 24 Process mgpp_passes; 91 25 92 byte [] bytes = docText.getBytes(); 93 int pos = 0, end = bytes.length; 26 static final String documentSeparator = "<Document>"; 27 static final String sectionSeparator = "<Section>"; 28 29 public static final String MGPP_INDEX_TYPE = "mgpp"; 30 public static final String INDEX_FILE_STEM = "index"; 31 32 public MGPPIndexer(String name) 33 { 34 this.name = name; 35 //this.passExtra = ""; 36 } 37 38 public String getName() 39 { 40 return this.name; 41 } 42 43 /** 44 * The output directory should be (collection)/building/text/ for 45 * normal Greenstone builds 46 */ 47 public boolean configure(String label, String value) 48 { 49 if (label.equals("outputDir")) { 50 this.outputDirectory = value; 51 this.outputStem = value + "/index"; 52 } 53 this.pass = 0; 54 return true; 55 } 56 57 public String getIndexType() 58 { 59 return MGPP_INDEX_TYPE; 60 } 61 62 public boolean addIndex(String name, String level, String field) 63 { 64 if (level == "doc_level") { 65 passExtra = " -J " + level; 66 } 67 else { 68 passExtra = " -K " + level; 69 } 70 return true; 71 } 72 73 /** 74 * Index a single document; the document interface can be used to extract individual 75 * metadata items etc. as required or desired and index those instead or as well as 76 * the body text of the document. 77 */ 78 public boolean indexDocument(DocumentID docID, DocumentInterface document) 79 { 80 if (this.pass == 0) { 81 document.removeAllMetadata("gsdl3", "mgppseqno"); 82 } 83 84 if (!this.firstDocument) { 85 // Send a '<document>' before the document itself 86 try { 87 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 88 } 89 catch (IOException ex) { 90 System.out.println("Bad output on end of document" + ex); 91 ex.printStackTrace(); 92 return false; 93 } 94 } 95 96 String docText = document.getDocumentText(); 97 int startSeqNo = this.documentSeqNo; 98 99 byte [] bytes = docText.getBytes(); 100 int pos = 0, end = bytes.length; 101 102 try { 103 while (pos < end) { 104 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 105 pos = pos + 512; 106 107 try { 108 while (this.indexerFeedback.available() > 0) { 109 byte b[] = new byte[this.indexerFeedback.available()]; 110 System.out.println("Feedback of " + this.indexerFeedback.available()); 111 this.indexerFeedback.read(b); 112 System.out.println(b); 113 } 114 } 115 catch (IOException ex) { 116 117 } 118 119 120 try { 121 while (this.indexerErrors.available() > 0) { 122 byte b[] = new byte[this.indexerErrors.available()]; 123 System.out.println("Feedback of " + this.indexerErrors.available()); 124 this.indexerErrors.read(b); 125 System.out.println(new String(b)); 126 } 127 } 128 catch (IOException ex){ 129 130 } 131 } 132 } 133 catch (IOException ex) { 134 System.out.println("Bad output during document write " + ex + " " + pos + " " + end); 135 ex.printStackTrace(); 136 return false; 137 } 138 this.firstDocument = false; 139 140 if (this.pass == 0) { 141 document.addDocumentMetadata("gsdl3", "mgppseqno", "dtx."+Integer.toString(startSeqNo)); 142 } 143 this.documentSeqNo += 1; 144 145 try { 146 while (this.indexerErrors.available() > 0) { 147 char c = (char) this.indexerErrors.read(); 148 System.out.println(c); 149 } 150 while (this.indexerFeedback.available() > 0) { 151 byte b[] = new byte[this.indexerFeedback.available()]; 152 System.out.println("Feedback of " + this.indexerFeedback.available()); 153 this.indexerFeedback.read(b); 154 } 155 } 156 catch (IOException ex) { 157 158 } 159 return true; 160 } 161 162 /** 163 * Initialise the pass: open required files, check status 164 */ 165 public boolean startPass(int passNumber) 166 { 167 this.pass = passNumber; 168 this.firstDocument = true; 169 this.documentSeqNo = 1; 94 170 95 try { 96 while (pos < end) { 97 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 98 pos = pos + 512; 99 171 try { 172 switch (this.pass) { 173 case 0: 174 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1"); 175 break; 176 177 case 1: 178 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2"); 179 break; 180 181 case 2: 182 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1"); 183 break; 184 185 case 3: 186 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 187 p.waitFor(); 188 189 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2"); 190 break; 191 } 192 193 this.indexerFeedback = mgpp_passes.getInputStream(); 194 this.indexerErrors = mgpp_passes.getErrorStream(); 195 this.indexerTextfeed = mgpp_passes.getOutputStream(); 196 } 197 catch (IOException ex) { 198 System.out.println(ex); 199 ex.printStackTrace(); 200 return false; 201 } 202 catch (InterruptedException ex) { 203 System.out.println(ex); 204 ex.printStackTrace(); 205 return false; 206 } 207 System.out.println("Pass " + this.pass); 208 return true; 209 } 210 211 /** 212 * Complete a pass - reset file counters, close files, etc. 213 */ 214 public boolean endPass(int passNumber) 215 { 216 // TODO: end pass 217 Process p; 218 100 219 try { 101 while (this.indexerFeedback.available() > 0) 102 { byte b[] = new byte[this.indexerFeedback.available()]; 103 System.out.println("Feedback of " + this.indexerFeedback.available()); 104 this.indexerFeedback.read(b); 105 System.out.println(b); 106 } 107 } 108 catch (IOException ex) 109 { 110 } 111 112 220 this.indexerTextfeed.write((char) 2); 221 this.indexerTextfeed.write(4); 222 while (this.indexerErrors.available() > 0) { 223 char c = (char) this.indexerErrors.read(); 224 System.out.print(c); 225 } 226 while (this.indexerFeedback.available() > 0) { 227 byte b[] = new byte[this.indexerFeedback.available()]; 228 System.out.print("Feedback of " + this.indexerFeedback.available()); 229 this.indexerFeedback.read(b); 230 } 231 232 this.indexerTextfeed.close(); 233 Thread.sleep(1000); 234 this.mgpp_passes.waitFor(); 235 } 236 catch (IOException ex) { 237 System.out.println(ex); 238 } 239 catch (InterruptedException ex) { 240 System.out.println(ex); 241 } 242 System.out.println("Completed with " + this.mgpp_passes.exitValue()); 243 113 244 try { 114 while (this.indexerErrors.available() > 0) 115 { byte b[] = new byte[this.indexerErrors.available()]; 116 System.out.println("Feedback of " + this.indexerErrors.available()); 117 this.indexerErrors.read(b); 118 System.out.println(new String(b)); 119 } 120 } 121 catch (IOException ex) 122 { 123 } 124 } 125 } 126 catch (IOException ex) 127 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end); 128 ex.printStackTrace(); 129 return false; 130 } 131 this.firstDocument = false; 132 133 if (this.pass == 0) { 134 document.addDocumentMetadata("gsdl3", "mgppseqno", "dtx."+Integer.toString(startSeqNo)); 135 } 136 this.documentSeqNo += 1; 137 138 try { 139 while (this.indexerErrors.available() > 0) 140 { char c = (char) this.indexerErrors.read(); 141 System.out.println(c); 142 } 143 while (this.indexerFeedback.available() > 0) 144 { byte b[] = new byte[this.indexerFeedback.available()]; 145 System.out.println("Feedback of " + this.indexerFeedback.available()); 146 this.indexerFeedback.read(b); 147 } 148 } 149 catch (IOException ex) 245 switch (this.pass) { 246 case 0: 247 System.out.println("Compressing dictionary"); 248 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120"); 249 p.waitFor(); 250 System.out.println(p.exitValue()); 251 break; 252 253 case 3: 254 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem); 255 p.waitFor(); 256 System.out.println(p.exitValue()); 257 258 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem); 259 p.waitFor(); 260 System.out.println(p.exitValue()); 261 262 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory); 263 p.waitFor(); 264 System.out.println(p.exitValue()); 265 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory); 266 p.waitFor(); 267 System.out.println(p.exitValue()); 268 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory); 269 p.waitFor(); 270 System.out.println(p.exitValue()); 271 break; 272 } 273 } 274 catch (IOException ex) { 275 System.out.println(ex); 276 ex.printStackTrace(); 277 return false; 278 } 279 catch (InterruptedException ex) { 280 System.out.println(ex); 281 ex.printStackTrace(); 282 return false; 283 } 284 return true; 285 } 286 287 /** 288 * Do any tidying up 289 */ 290 public void tidyup() 150 291 { 151 292 } 152 return true;153 }154 155 /**156 * Initialise the pass: open required files, check status157 */158 public boolean startPass(int passNumber)159 { this.pass = passNumber;160 this.firstDocument = true;161 this.documentSeqNo = 1;162 293 163 try { 164 switch (this.pass) { 165 case 0: 166 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1"); 167 break; 168 169 case 1: 170 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2"); 171 break; 172 173 case 2: 174 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1"); 175 break; 176 177 case 3: 178 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 179 p.waitFor(); 180 181 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2"); 182 break; 183 } 184 185 this.indexerFeedback = mgpp_passes.getInputStream(); 186 this.indexerErrors = mgpp_passes.getErrorStream(); 187 this.indexerTextfeed = mgpp_passes.getOutputStream(); 188 } 189 catch (IOException ex) 190 { System.out.println(ex); 191 ex.printStackTrace(); 192 return false; 193 } 194 catch (InterruptedException ex) 195 { System.out.println(ex); 196 ex.printStackTrace(); 197 return false; 198 } 199 System.out.println("Pass " + this.pass); 200 return true; 201 } 202 203 /** 204 * Complete a pass - reset file counters, close files, etc. 205 */ 206 public boolean endPass(int passNumber) 207 { // TODO: end pass 208 Process p; 209 210 try { 211 this.indexerTextfeed.write((char) 2); 212 this.indexerTextfeed.write(4); 213 while (this.indexerErrors.available() > 0) 214 { char c = (char) this.indexerErrors.read(); 215 System.out.print(c); 216 } 217 while (this.indexerFeedback.available() > 0) 218 { byte b[] = new byte[this.indexerFeedback.available()]; 219 System.out.print("Feedback of " + this.indexerFeedback.available()); 220 this.indexerFeedback.read(b); 221 } 222 223 this.indexerTextfeed.close(); 224 Thread.sleep(1000); 225 this.mgpp_passes.waitFor(); 226 } 227 catch (IOException ex) 228 { System.out.println(ex); 229 } 230 catch (InterruptedException ex) 231 { System.out.println(ex); 232 } 233 System.out.println("Completed with " + this.mgpp_passes.exitValue()); 234 235 try { 236 switch (this.pass) 237 { 238 case 0: 239 System.out.println("Compressing dictionary"); 240 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120"); 241 p.waitFor(); 242 System.out.println(p.exitValue()); 243 break; 244 245 case 3: 246 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem); 247 p.waitFor(); 248 System.out.println(p.exitValue()); 249 250 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem); 251 p.waitFor(); 252 System.out.println(p.exitValue()); 253 254 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory); 255 p.waitFor(); 256 System.out.println(p.exitValue()); 257 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory); 258 p.waitFor(); 259 System.out.println(p.exitValue()); 260 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory); 261 p.waitFor(); 262 System.out.println(p.exitValue()); 263 break; 264 } 265 } 266 catch (IOException ex) 267 { System.out.println(ex); 268 ex.printStackTrace(); 269 return false; 270 } 271 catch (InterruptedException ex) 272 { System.out.println(ex); 273 ex.printStackTrace(); 274 return false; 275 } 276 return true; 277 } 278 279 /** 280 * Do any tidying up 281 */ 282 public void tidyup() 283 { 284 } 285 286 /** 287 * Return the number of passes required for this index. 288 */ 289 public int getNumberOfPasses() 290 { return 4; 291 } 292 293 public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) { 294 System.out.println("adding service description, MGPPIndexer"); 295 return true; 296 } 294 /** 295 * Return the number of passes required for this index. 296 */ 297 public int getNumberOfPasses() 298 { 299 return 4; 300 } 301 302 public boolean addServiceDescriptions(Element service_rack_list) 303 { 304 System.out.println("adding service description, MGPPIndexer"); 305 return true; 306 } 297 307 }
Note:
See TracChangeset
for help on using the changeset viewer.