Changeset 6898
- Timestamp:
- 2004-02-26T09:54:54+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
r6736 r6898 17 17 import org.greenstone.gsdl3.gs3build.metadata.*; 18 18 import org.greenstone.gsdl3.gs3build.xpointer.XPointer; 19 import org.greenstone.gsdl3.util.GSXML; 19 20 20 21 public class MGIndexer extends AbstractIndexer … … 44 45 45 46 public static final String MG_INDEX_TYPE = "mg"; 46 47 public static final String INDEX_FILE_STEM = "index"; 47 48 class MGIndex 48 { String name; 49 String level; 50 String field; 49 { String name=null; 50 String level=null; 51 String field=null; 52 boolean error = false; 51 53 52 54 public MGIndex(String name, String level, String field) … … 54 56 this.level = level; 55 57 this.field = field; 58 //this.error = false; // assume built until we get an error 56 59 } 57 60 … … 62 65 { this.field = indexLabel.substring(colonAt+1); 63 66 this.level = indexLabel.substring(0, colonAt); 64 } 65 this.name = null; 67 createIndexName(); 68 } 69 //this.name = null; 70 //this.error = false; 66 71 } 67 72 … … 75 80 76 81 public String getName() 77 { return this.name; 78 } 82 { 83 if (this.name==null || this.name.equals("")) { 84 createIndexName(); 85 } 86 return this.name; 87 } 88 89 public boolean hasError() { 90 return this.error; 91 } 92 public void setError(boolean b) { 93 this.error = b; 94 } 95 96 private void createIndexName() { 97 StringBuffer new_name = new StringBuffer(); 98 new_name.append(Character.toLowerCase((char) this.level.charAt(0))); 99 100 int c, w; 101 w = 0; 102 c = 0; 103 while (c < this.field.length() && w < 2) { 104 char ch = this.field.charAt(c); 105 106 ch = Character.toLowerCase(ch); 107 if (Character.isLetter(ch)) { 108 if (ch != 'a' && ch != 'e' && ch != 'i' && 109 ch != 'o' && ch != 'u') { 110 new_name.append(ch); 111 w++; 112 } 113 } 114 c ++; 115 } 116 this.name = new_name.toString(); 117 118 } 79 119 } 80 120 … … 92 132 } 93 133 94 private String getIndexDirectory(String level, String field)95 { StringBuffer directory = new StringBuffer();96 directory.append(Character.toLowerCase((char) level.charAt(0)));97 98 int c, w;99 w = 0;100 c = 0;101 while (c < field.length() && w < 2) {102 char ch = field.charAt(c);103 104 ch = Character.toLowerCase(ch);105 if (Character.isLetter(ch)) {106 if (ch != 'a' && ch != 'e' && ch != 'i' &&107 ch != 'o' && ch != 'u') {108 directory.append(ch);109 w++;110 }111 }112 c ++;113 }114 return directory.toString();115 }134 // private String getIndexDirectory(String level, String field) 135 // { StringBuffer directory = new StringBuffer(); 136 // directory.append(Character.toLowerCase((char) level.charAt(0))); 137 138 // int c, w; 139 // w = 0; 140 // c = 0; 141 // while (c < field.length() && w < 2) { 142 // char ch = field.charAt(c); 143 144 // ch = Character.toLowerCase(ch); 145 // if (Character.isLetter(ch)) { 146 // if (ch != 'a' && ch != 'e' && ch != 'i' && 147 // ch != 'o' && ch != 'u') { 148 // directory.append(ch); 149 // w++; 150 // } 151 // } 152 // c ++; 153 // } 154 // return directory.toString(); 155 // } 116 156 117 157 /** … … 126 166 if (label.equals(IndexerManager.outputDir)) { 127 167 this.outputDirectory = value; 128 this.textStem = value + "/text/index";129 168 this.pass = 0; 130 169 … … 139 178 return false; 140 179 } 180 this.textStem = this.textDirectory.getPath() + File.separator + INDEX_FILE_STEM; 141 181 142 182 // Sign to the user which mg directory is being used... … … 159 199 private Node recurseDOM(DocumentInterface metsDoc, Node node, 160 200 AbstractStructure structure, StringBuffer textBuffer, 161 StringBuffer extraBuffer, String indexName,162 String namespace, String field)201 StringBuffer extraBuffer, String namespace) 202 //String indexName, String namespace, String field) 163 203 { 164 204 // send out the ctrl-c...if this is 165 205 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) { 166 if ((indexName != null) &&indexName.startsWith("s")) {206 if ((this.indexName != null) && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //indexName.startsWith("s")) { 167 207 METSDivision division = (METSDivision) structure; 168 208 … … 179 219 } 180 220 181 descriptive.addMetadata("gsdl3", "mgseqno", indexName + "." + Integer.toString(this.sectionSeqNo));221 descriptive.addMetadata("gsdl3", "mgseqno", this.indexName + "." + Integer.toString(this.sectionSeqNo)); 182 222 metsDoc.setModified(true); 183 223 // System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel()); … … 212 252 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 213 253 if (descriptive != null) { 214 List values = descriptive.getMetadata(namespace, field);254 List values = descriptive.getMetadata(namespace, this.field); 215 255 216 256 if (values != null) { … … 240 280 // while this node isn't the child's start node, produce the HTML node text, if 241 281 // in text field mode... 242 if ( field.equals("text")) {282 if (this.field.equals("text")) { 243 283 while (node != startNode) { 244 284 XPointer.printNode(node, textBuffer, false); 245 285 246 286 // print buffer to node 247 node = XPointer.getNextNode(node, ( field.equals("text") ? textBuffer : null));287 node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null)); 248 288 } 249 289 } 250 290 251 291 // recurse to child 252 node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, indexName, namespace, field);292 node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // indexName, namespace, field); 253 293 } 254 294 … … 257 297 if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) { 258 298 while (node != null) { 259 if ( field.equals("text")) {299 if (this.field.equals("text")) { 260 300 XPointer.printNode(node, textBuffer, false); 261 301 } 262 node = XPointer.getNextNode(node, ( field.equals("text") ? textBuffer : null));302 node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null)); 263 303 } 264 304 /* … … 270 310 } 271 311 272 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure,273 312 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace) 313 // String indexName, String namespace, String field) 274 314 { StringBuffer extraBuffer = new StringBuffer(); 275 315 Node node = document.getDocumentElement(); 276 316 StringBuffer textBuffer = new StringBuffer(); 277 317 278 this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, indexName, namespace, field);318 this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //indexName, namespace, field); 279 319 textBuffer.append(extraBuffer.toString()); 280 320 return textBuffer.toString(); … … 313 353 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 314 354 if (sections != null) { 315 docText = this.prepareDOM(document, domDocument, sections, this.indexName, "gsdl3", this.field);355 docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.indexName, "gsdl3", this.field); 316 356 // System.out.println(docText); 317 357 } … … 429 469 430 470 int indexNo = (this.pass - 2) / 2; 471 MGIndex index = null; 431 472 if (this.pass >= 2) { 432 MGIndex index = (MGIndex) this.indexes.get(indexNo); 433 434 // attempt to ensure that the text subdirectory exists 435 this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField())); 436 if (!indexDirectory.exists()) { 437 if (!indexDirectory.mkdir()) { 438 return false; 439 } 440 } 441 else if (!indexDirectory.isDirectory()) { 442 return false; 443 } 473 index = (MGIndex) this.indexes.get(indexNo); 474 if (index.hasError()) { 475 // an error has already occurred for this index, don't continue 476 System.out.println("pass "+this.pass+": aborted due to errors in the previous pass"); 477 return false; 478 } 479 // attempt to ensure that the text subdirectory exists 480 //this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField())); 481 this.indexDirectory = new File(outputDirectory, index.getName()); 482 if (!indexDirectory.exists()) { 483 if (!indexDirectory.mkdir()) { 484 return false; 485 } 486 } 487 else if (!indexDirectory.isDirectory()) { 488 return false; 489 } 444 490 445 491 this.level = index.getLevel(); 446 492 this.field = index.getField(); 447 493 this.indexName = index.getName(); 448 if (this.indexName == null || this.indexName.length() == 0) { 449 this.indexName = this.getIndexDirectory(index.getLevel(), index.getField()); 450 } 451 this.indexStem = this.outputDirectory + File.separatorChar + 452 this.indexName + File.separatorChar + "index"; // TODO: modify for index 494 if (this.level == null || this.field == null ) { 495 System.out.println("invalid index - level or field was null"); 496 return false; 497 } 498 //if (this.indexName == null || this.indexName.length() == 0) { 499 //this.indexName = this.getIndexDirectory(index.getLevel(), index.getField()); 500 //} 501 this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM; // TODO: modify for index 453 502 if (this.pass % 2 == 1) { 454 this.indexName = null;503 this.indexName = null; // why??? 455 504 } 456 505 } … … 468 517 469 518 int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2); 470 519 471 520 try { 472 521 switch (mgPass) { … … 484 533 485 534 case 3: 486 Process p = Runtime.getRuntime().exec("mg_perf_hash_build -f index -d " + this.indexDirectory.toString());487 p.waitFor();488 if (p.exitValue() == 0) {489 System.out.println("Perfect hashes completed");490 }491 492 535 mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -2 -c 3 -G -t 10 -N2"); 493 536 break; … … 501 544 { System.out.println(ex); 502 545 ex.printStackTrace(); 546 index.setError(true); 503 547 return false; 504 548 } 505 catch (InterruptedException ex) 506 { System.out.println(ex); 507 ex.printStackTrace(); 508 return false; 509 } 549 // catch (InterruptedException ex) 550 // { System.out.println(ex); 551 // ex.printStackTrace(); 552 // index.setError(true); 553 // return false; 554 // } 510 555 System.out.println("Pass " + this.pass); 511 556 return true; … … 517 562 public boolean endPass(int passNumber) 518 563 { Process p; 519 520 try { 564 565 int indexNo = (passNumber - 2) / 2; 566 MGIndex index = null; 567 if (passNumber >= 2) { 568 index = (MGIndex) this.indexes.get(indexNo); 569 } 570 try { 521 571 this.indexerTextfeed.write(END_OF_DOCUMENT); 522 572 this.indexerTextfeed.write(END_OF_STREAM); … … 541 591 { System.out.println(ex); 542 592 } 543 System.out.println("Pass " + this.pass + " completed with " + this.mg_passes.exitValue()); 544 593 int exitValue = this.mg_passes.exitValue(); 594 System.out.println("Pass " + this.pass + " completed with " + exitValue); 595 if (exitValue !=0) { 596 //assume something has gone wrong, don't continue 597 if (index != null) { 598 index.setError(true); 599 return false; 600 } 601 } 545 602 int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2); 546 547 603 try { 548 604 switch (mgPass) … … 554 610 if (p.exitValue() != 0) { 555 611 System.out.println("Error from mg_compression_dict: " + p.exitValue()); 612 613 return false; 556 614 } 557 615 else { … … 559 617 } 560 618 break; 619 620 case 2: 621 System.out.println("Creating perfect hash"); 622 p = Runtime.getRuntime().exec("mg_perf_hash_build -f index -d " + this.indexDirectory.toString()); 623 p.waitFor(); 624 if (p.exitValue() == 0) { 625 System.out.println("Perfect hashes completed"); 626 } else { 627 System.out.println("Unable to build the perfect hash"); 628 index.setError(true); 629 return false; 630 } 631 break; 561 632 562 633 case 3: … … 569 640 else { 570 641 System.out.println("Unable to create weights file " + "mg_weights_build -f " + this.indexStem + " -t " + this.textStem + " -d /"); 642 index.setError(true); 643 return false; 644 571 645 } 572 646 … … 578 652 else { 579 653 System.out.println("Unable to create inverted dictionary file"); 654 index.setError(true); 655 return false; 656 580 657 } 581 658 … … 583 660 p.waitFor(); 584 661 if (p.exitValue() == 0) { 585 System.out.println("Stemmed index successfully written");662 System.out.println("Stemmed index 1 successfully written"); 586 663 } 587 664 else { 588 System.out.println("Unable to create stemmed index"); 665 System.out.println("Unable to create stemmed index 1"); 666 index.setError(true); 667 return false; 668 589 669 } 590 670 … … 592 672 p.waitFor(); 593 673 if (p.exitValue() == 0) { 594 System.out.println("Stemmed index successfully written");674 System.out.println("Stemmed index 2 successfully written"); 595 675 } 596 676 else { 597 System.out.println("Unable to create stemmed index"); 677 System.out.println("Unable to create stemmed index 2"); 678 index.setError(true); 679 return false; 598 680 } 599 681 … … 601 683 p.waitFor(); 602 684 if (p.exitValue() == 0) { 603 System.out.println("Stemmed index successfully written");685 System.out.println("Stemmed index 3 successfully written"); 604 686 } 605 687 else { 606 System.out.println("Unable to create stemmed index"); 688 System.out.println("Unable to create stemmed index 3"); 689 index.setError(true); 690 return false; 607 691 } 608 692 break; … … 612 696 { System.out.println(ex); 613 697 ex.printStackTrace(); 698 index.setError(true); 614 699 return false; 615 700 } … … 617 702 { System.out.println(ex); 618 703 ex.printStackTrace(); 704 index.setError(true); 619 705 return false; 620 706 } … … 635 721 { return 2 + this.indexes.size() * 2; 636 722 } 723 724 public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) { 725 System.out.println("adding service description, MGIndexer"); 726 Document doc = service_rack_list.getOwnerDocument(); 727 728 // generate the list of indexes 729 Element index_list = doc.createElement(GSXML.INDEX_ELEM+GSXML.LIST_MODIFIER); 730 boolean found_index = false; 731 String def_index = ""; // the default index will just be the first one created for now. 732 for (int i=0; i<this.indexes.size(); i++) { 733 MGIndex index = (MGIndex)this.indexes.get(i); 734 if (!index.hasError()) { 735 Element e = doc.createElement(GSXML.INDEX_ELEM); 736 e.setAttribute(GSXML.NAME_ATT, index.getName()); 737 index_list.appendChild(e); 738 if (found_index == false) { 739 // this is the first index 740 found_index = true; 741 def_index = index.getName(); 742 } 743 } 744 } 745 746 if (!found_index) { 747 // no indexes were able to be created, so we can't use them or the text 748 return false; 749 } 750 Element default_index = doc.createElement("defaultIndex"); 751 default_index.setAttribute(GSXML.NAME_ATT, def_index); 752 753 Element search_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM); 754 Element retrieve_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM); 755 service_rack_list.appendChild(search_service_elem); 756 service_rack_list.appendChild(retrieve_service_elem); 757 758 search_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGSearch"); 759 760 search_service_elem.appendChild(index_list); 761 search_service_elem.appendChild(default_index); 762 retrieve_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGRetrieve"); 763 retrieve_service_elem.appendChild(default_index.cloneNode(true)); 764 return true; 765 } 766 637 767 }
Note:
See TracChangeset
for help on using the changeset viewer.