- Timestamp:
- 2004-01-06T11:46:04+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
r6283 r6349 35 35 List indexes; 36 36 String level; 37 38 public static final String MG_INDEX = "Index"; 37 String field; 39 38 40 39 class MGIndex … … 42 41 String field; 43 42 43 public MGIndex(String level, String field) 44 { this.level = level; 45 this.field = field; 46 } 47 44 48 public MGIndex(String indexLabel) 45 49 { int colonAt = indexLabel.indexOf(':'); 50 46 51 if (colonAt >= 0) 47 52 { field = indexLabel.substring(colonAt+1); 48 53 level = indexLabel.substring(0, colonAt); 49 }50 else51 {52 54 } 53 55 } … … 91 93 /** 92 94 * The output directory should be (collection)/building/text/ for 93 * normal Greenstone builds 95 * normal Greenstone builds. 96 * 97 * @param <code>String</code> the label to configure 98 * @param <code>String</code> the value... 94 99 */ 95 100 public boolean configure(String label, String value) … … 114 119 System.out.println("Output MG directory is " + this.textStem); 115 120 } 116 else if (label.equals( MG_INDEX)) {121 else if (label.equals(IndexerInterface.GS2_INDEX_LABEL)) { 117 122 this.indexes.add(new MGIndex(value)); 118 123 } … … 121 126 } 122 127 128 public boolean addIndex(String level, String field) 129 { 130 MGIndex index = new MGIndex(level, field); 131 this.indexes.add(index); 132 return true; 133 } 134 123 135 private Node recurseDOM(DocumentInterface metsDoc, Node node, 124 AbstractStructure structure, StringBuffer buffer) 136 AbstractStructure structure, StringBuffer buffer, 137 String namespace, String field) 125 138 { 126 139 // send out the ctrl-c...if this is … … 146 159 buffer.append((char) 3); 147 160 if (this.level != null && 148 this.level.equals( "section")) {161 this.level.equals(IndexerInterface.SECTION_LEVEL)) { 149 162 buffer.append((char) 2); 150 163 } … … 152 165 } 153 166 154 // go through our children asrequired...167 // go through our children if required... 155 168 Iterator children = structure.getChildIterator(); 156 169 while (children.hasNext()) { 157 170 AbstractStructure child = (AbstractStructure) children.next(); 158 171 159 172 // get xpointer for child 160 173 // get start position node 161 174 Node startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child); 162 163 // while this node isn't the child's start node, produce the node 164 while (node != startNode) { 165 XPointer.printNode(node, buffer, false); 166 // print buffer to node 167 node = XPointer.getNextNode(node, buffer); 168 } 169 175 176 // while this node isn't the child's start node, produce the node text 177 if (field.equals("text")) { 178 while (node != startNode) { 179 XPointer.printNode(node, buffer, false); 180 181 // print buffer to node 182 node = XPointer.getNextNode(node, (field.equals("text") ? buffer : null)); 183 } 184 } 185 170 186 // recurse to child 171 this.recurseDOM(metsDoc, node, child, buffer );187 this.recurseDOM(metsDoc, node, child, buffer, namespace, field); 172 188 } 173 189 … … 176 192 if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) { 177 193 while (node != null) { 178 XPointer.printNode(node, buffer, false); 179 node = XPointer.getNextNode(node, buffer); 194 if (field.equals("text")) { 195 XPointer.printNode(node, buffer, false); 196 } 197 else { 198 METSDescriptive descriptive; 199 200 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) { 201 METSDivision division = (METSDivision) structure; 202 203 String metadataId = division.getDefaultMetadataReference(); 204 205 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 206 if (descriptive != null) { 207 List values = descriptive.getMetadata(namespace, field); 208 209 Iterator valueIter = values.iterator(); 210 while (valueIter.hasNext()) { 211 String value = valueIter.next().toString(); 212 213 buffer.append(value); 214 if (valueIter.hasNext()) { 215 buffer.append((char) 3); 216 } 217 } 218 } 219 } 220 } 221 node = XPointer.getNextNode(node, (field.equals("text") ? buffer : null)); 180 222 } 181 223 buffer.append((char) 3); … … 185 227 } 186 228 187 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure )229 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace, String field) 188 230 { Node node = document.getDocumentElement(); 189 231 StringBuffer textBuffer = new StringBuffer(); 190 232 191 this.recurseDOM(metsDoc, node, structure, textBuffer );233 this.recurseDOM(metsDoc, node, structure, textBuffer, namespace, field); 192 234 return textBuffer.toString(); 193 235 } … … 217 259 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 218 260 if (sections != null) { 219 docText = this.prepareDOM(document, domDocument, sections );261 docText = this.prepareDOM(document, domDocument, sections, "gsdl3", this.field); 220 262 // System.out.println(docText); 221 263 } … … 316 358 317 359 this.indexStem = this.outputDirectory + File.separatorChar + 318 this.getIndexDirectory( "document", "text") +360 this.getIndexDirectory(index.getLevel(), index.getField()) + 319 361 File.separatorChar + "index"; // TODO: modify for index 320 362 this.level = index.getLevel(); 363 this.field = index.getField(); 364 } 365 else { 366 this.field = "text"; 321 367 } 322 368 … … 343 389 Process p = Runtime.getRuntime().exec("mg_perf_hash_build -f index -d " + this.indexDirectory.toString()); 344 390 p.waitFor(); 345 System.out.println(p.exitValue()); 391 if (p.exitValue() == 0) { 392 System.out.println("Perfect hashes completed"); 393 } 346 394 347 395 mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -2 -c 3 -G -t 10 -N2"); … … 402 450 try { 403 451 switch (mgPass) 404 { 452 { 405 453 case 0: 406 454 System.out.println("Compressing dictionary"); … … 413 461 414 462 case 3: 463 System.out.println("Writing weights file"); 415 464 p = Runtime.getRuntime().exec("mg_weights_build -f " + this.indexStem + " -t " + this.textStem + " -d /"); 416 465 p.waitFor(); 417 System.out.println(p.exitValue()); 466 if (p.exitValue() == 0) { 467 System.out.println("Weights file successfully written"); 468 } 469 else { 470 System.out.println("Unable to create weights file"); 471 } 418 472 419 473 p = Runtime.getRuntime().exec("mg_invf_dict -f index -d " + this.indexDirectory.toString()); 420 474 p.waitFor(); 421 System.out.println(p.exitValue()); 475 if (p.exitValue() == 0) { 476 System.out.println("Inverted dictionary file successfully written"); 477 } 478 else { 479 System.out.println("Unable to create inverted dictionary file"); 480 } 422 481 423 482 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s1 -f index -d " + this.indexDirectory.toString()); 424 483 p.waitFor(); 425 System.out.println(p.exitValue()); 484 if (p.exitValue() == 0) { 485 System.out.println("Stemmed index successfully written"); 486 } 487 else { 488 System.out.println("Unable to create stemmed index"); 489 } 490 426 491 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s2 -f index -d " + this.indexDirectory.toString()); 427 492 p.waitFor(); 428 System.out.println(p.exitValue()); 493 if (p.exitValue() == 0) { 494 System.out.println("Stemmed index successfully written"); 495 } 496 else { 497 System.out.println("Unable to create stemmed index"); 498 } 499 429 500 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s3 -f index -d " + this.indexDirectory.toString()); 430 501 p.waitFor(); 431 System.out.println(p.exitValue()); 502 if (p.exitValue() == 0) { 503 System.out.println("Stemmed index successfully written"); 504 } 505 else { 506 System.out.println("Unable to create stemmed index"); 507 } 432 508 break; 433 509 }
Note:
See TracChangeset
for help on using the changeset viewer.