- Timestamp:
- 2004-01-06T11:46:04+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java
r6283 r6349 10 10 public class MGPPIndexer implements IndexerInterface 11 11 { 12 13 14 15 16 17 18 19 20 21 22 23 24 12 int pass; 13 boolean firstDocument; 14 String outputDirectory; 15 String outputStem; 16 InputStream indexerFeedback; 17 InputStream indexerErrors; 18 OutputStream indexerTextfeed; 19 Process mgpp_passes; 20 static final String documentSeparator = "<Document>"; 21 22 public MGPPIndexer() 23 { 24 } 25 25 26 26 /** … … 38 38 } 39 39 40 /** 41 * Index a single document; the document interface can be used to extract individual 42 * metadata items etc. as required or desired and index those instead or as well as 43 * the body text of the document. 44 */ 45 public boolean indexDocument(DocumentID docID, DocumentInterface document) 46 { 47 if (!this.firstDocument) 48 { // Send a '<document>' before the document itself 49 try { 50 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 51 } 52 catch (IOException ex) 53 { System.out.println("Bad output on end of document" + ex); 54 ex.printStackTrace(); 55 return false; 56 } 57 } 58 String docText = document.getDocumentText(); 59 60 byte [] bytes = docText.getBytes(); 61 int pos = 0, end = bytes.length; 62 63 try { 64 while (pos < end) { 65 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 66 pos = pos + 512; 67 68 try { 69 while (this.indexerFeedback.available() > 0) 70 { byte b[] = new byte[this.indexerFeedback.available()]; 71 System.out.println("Feedback of " + this.indexerFeedback.available()); 72 this.indexerFeedback.read(b); 73 System.out.println(b); 74 } 75 } 76 catch (IOException ex) 77 { 78 } 79 80 81 try { 82 while (this.indexerErrors.available() > 0) 83 { byte b[] = new byte[this.indexerErrors.available()]; 84 System.out.println("Feedback of " + this.indexerErrors.available()); 85 this.indexerErrors.read(b); 86 System.out.println(new String(b)); 87 } 88 } 89 catch (IOException ex) 90 { 91 } 92 } 93 } 94 catch (IOException ex) 95 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end); 96 ex.printStackTrace(); 97 return false; 98 } 99 this.firstDocument = false; 100 101 try { 102 while (this.indexerErrors.available() > 0) 103 { char c = (char) this.indexerErrors.read(); 104 System.out.println(c); 105 } 106 while (this.indexerFeedback.available() > 0) 107 { byte b[] = new byte[this.indexerFeedback.available()]; 108 System.out.println("Feedback of " + this.indexerFeedback.available()); 109 this.indexerFeedback.read(b); 110 } 111 } 112 catch (IOException ex) 113 { 114 } 115 return true; 40 public boolean addIndex(String level, String field) 41 { return true; 42 } 43 44 /** 45 * Index a single document; the document interface can be used to extract individual 46 * metadata items etc. as required or desired and index those instead or as well as 47 * the body text of the document. 48 */ 49 public boolean indexDocument(DocumentID docID, DocumentInterface document) 50 { 51 if (!this.firstDocument) 52 { // Send a '<document>' before the document itself 53 try { 54 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 55 } 56 catch (IOException ex) 57 { System.out.println("Bad output on end of document" + ex); 58 ex.printStackTrace(); 59 return false; 60 } 61 } 62 String docText = document.getDocumentText(); 63 64 byte [] bytes = docText.getBytes(); 65 int pos = 0, end = bytes.length; 66 67 try { 68 while (pos < end) { 69 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 70 pos = pos + 512; 71 72 try { 73 while (this.indexerFeedback.available() > 0) 74 { byte b[] = new byte[this.indexerFeedback.available()]; 75 System.out.println("Feedback of " + this.indexerFeedback.available()); 76 this.indexerFeedback.read(b); 77 System.out.println(b); 78 } 116 79 } 117 118 /** 119 * Initialise the pass: open required files, check status 120 */ 121 public boolean startPass(int passNumber) 122 { this.pass = passNumber; 123 this.firstDocument = true; 124 125 try { 126 switch (this.pass) { 127 case 0: 128 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem + " -T1"); 129 break; 130 131 case 1: 132 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -T2"); 133 break; 134 135 case 2: 136 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I1"); 137 break; 138 139 case 3: 140 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 141 p.waitFor(); 142 143 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I2"); 144 break; 145 } 80 catch (IOException ex) 81 { 82 } 83 84 85 try { 86 while (this.indexerErrors.available() > 0) 87 { byte b[] = new byte[this.indexerErrors.available()]; 88 System.out.println("Feedback of " + this.indexerErrors.available()); 89 this.indexerErrors.read(b); 90 System.out.println(new String(b)); 91 } 92 } 93 catch (IOException ex) 94 { 95 } 96 } 97 } 98 catch (IOException ex) 99 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end); 100 ex.printStackTrace(); 101 return false; 102 } 103 this.firstDocument = false; 104 105 try { 106 while (this.indexerErrors.available() > 0) 107 { char c = (char) this.indexerErrors.read(); 108 System.out.println(c); 109 } 110 while (this.indexerFeedback.available() > 0) 111 { byte b[] = new byte[this.indexerFeedback.available()]; 112 System.out.println("Feedback of " + this.indexerFeedback.available()); 113 this.indexerFeedback.read(b); 114 } 115 } 116 catch (IOException ex) 117 { 118 } 119 return true; 120 } 121 122 /** 123 * Initialise the pass: open required files, check status 124 */ 125 public boolean startPass(int passNumber) 126 { this.pass = passNumber; 127 this.firstDocument = true; 128 129 try { 130 switch (this.pass) { 131 case 0: 132 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem + " -T1"); 133 break; 134 135 case 1: 136 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -T2"); 137 break; 138 139 case 2: 140 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I1"); 141 break; 142 143 case 3: 144 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 145 p.waitFor(); 146 147 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes -f " + this.outputStem +" -I2"); 148 break; 149 } 146 150 147 148 149 150 151 152 {System.out.println(ex);153 154 155 156 157 {System.out.println(ex);158 159 160 161 162 163 164 165 166 167 168 169 {// TODO: end pass170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 {System.out.println(ex);230 231 232 233 234 {System.out.println(ex);235 236 237 238 239 240 241 /** 242 243 244 151 this.indexerFeedback = mgpp_passes.getInputStream(); 152 this.indexerErrors = mgpp_passes.getErrorStream(); 153 this.indexerTextfeed = mgpp_passes.getOutputStream(); 154 } 155 catch (IOException ex) 156 { System.out.println(ex); 157 ex.printStackTrace(); 158 return false; 159 } 160 catch (InterruptedException ex) 161 { System.out.println(ex); 162 ex.printStackTrace(); 163 return false; 164 } 165 System.out.println("Pass " + this.pass); 166 return true; 167 } 168 169 /** 170 * Complete a pass - reset file counters, close files, etc. 171 */ 172 public boolean endPass(int passNumber) 173 { // TODO: end pass 174 Process p; 175 176 try { 177 this.indexerTextfeed.write((char) 2); 178 this.indexerTextfeed.write(4); 179 while (this.indexerErrors.available() > 0) 180 { char c = (char) this.indexerErrors.read(); 181 System.out.print(c); 182 } 183 while (this.indexerFeedback.available() > 0) 184 { byte b[] = new byte[this.indexerFeedback.available()]; 185 System.out.print("Feedback of " + this.indexerFeedback.available()); 186 this.indexerFeedback.read(b); 187 } 188 189 this.indexerTextfeed.close(); 190 Thread.sleep(1000); 191 this.mgpp_passes.waitFor(); 192 } 193 catch (IOException ex) 194 { System.out.println(ex); 195 } 196 catch (InterruptedException ex) 197 { System.out.println(ex); 198 } 199 System.out.println("Completed with " + this.mgpp_passes.exitValue()); 200 201 try { 202 switch (this.pass) 203 { 204 case 0: 205 System.out.println("Compressing dictionary"); 206 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120"); 207 p.waitFor(); 208 System.out.println(p.exitValue()); 209 break; 210 211 case 3: 212 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem); 213 p.waitFor(); 214 System.out.println(p.exitValue()); 215 216 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem); 217 p.waitFor(); 218 System.out.println(p.exitValue()); 219 220 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory); 221 p.waitFor(); 222 System.out.println(p.exitValue()); 223 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory); 224 p.waitFor(); 225 System.out.println(p.exitValue()); 226 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory); 227 p.waitFor(); 228 System.out.println(p.exitValue()); 229 break; 230 } 231 } 232 catch (IOException ex) 233 { System.out.println(ex); 234 ex.printStackTrace(); 235 return false; 236 } 237 catch (InterruptedException ex) 238 { System.out.println(ex); 239 ex.printStackTrace(); 240 return false; 241 } 242 return true; 243 } 244 245 /** 246 * Do any tidying up 247 */ 248 public void tidyup() 245 249 { 246 247 248 249 250 251 252 {return 4;253 250 } 251 252 /** 253 * Return the number of passes required for this index. 254 */ 255 public int getNumberOfPasses() 256 { return 4; 257 } 254 258 }
Note:
See TracChangeset
for help on using the changeset viewer.