Changeset 6898


Ignore:
Timestamp:
2004-02-26T09:54:54+13:00 (20 years ago)
Author:
kjdon
Message:

added in a lot more checks for failed build commands, now uses teh index name as the directory name, remembers which indexes were successfully built, and creates some service descriptions for the buildconfig file

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java

    r6736 r6898  
    1717import org.greenstone.gsdl3.gs3build.metadata.*;
    1818import org.greenstone.gsdl3.gs3build.xpointer.XPointer;
     19import org.greenstone.gsdl3.util.GSXML;
    1920
    2021public class MGIndexer extends AbstractIndexer
     
    4445
    4546  public static final String MG_INDEX_TYPE = "mg";
    46 
     47    public static final String INDEX_FILE_STEM = "index";
    4748  class MGIndex
    48   { String name;
    49     String level;
    50     String field;
     49  { String name=null;
     50    String level=null;
     51    String field=null;
     52      boolean error = false;
    5153
    5254    public MGIndex(String name, String level, String field)
     
    5456      this.level = level;
    5557      this.field = field;
     58      //this.error = false; // assume built until we get an error
    5659    }
    5760
     
    6265      { this.field = indexLabel.substring(colonAt+1);
    6366        this.level = indexLabel.substring(0, colonAt);
    64       }
    65       this.name = null;
     67    createIndexName();
     68      }
     69      //this.name = null;
     70      //this.error = false;
    6671    }
    6772
     
    7580
    7681    public String getName()
    77     { return this.name;
    78     }
     82    {
     83    if (this.name==null || this.name.equals("")) {
     84        createIndexName();
     85    }
     86    return this.name;
     87    }
     88
     89      public boolean hasError() {
     90      return this.error;
     91      }
     92      public void setError(boolean b) {
     93      this.error = b;
     94      }
     95
     96      private void createIndexName() {
     97      StringBuffer new_name = new StringBuffer();
     98      new_name.append(Character.toLowerCase((char) this.level.charAt(0)));
     99
     100      int c, w;
     101      w = 0;
     102      c = 0;
     103      while (c < this.field.length() && w < 2) {
     104          char ch = this.field.charAt(c);
     105
     106          ch = Character.toLowerCase(ch);
     107          if (Character.isLetter(ch)) {
     108          if (ch != 'a' && ch != 'e' && ch != 'i' &&
     109              ch != 'o' && ch != 'u') {
     110              new_name.append(ch);
     111              w++;
     112          }
     113          }
     114          c ++;
     115      }
     116      this.name = new_name.toString();
     117
     118      }
    79119  }
    80120
     
    92132  }
    93133
    94   private String getIndexDirectory(String level, String field)
    95   { StringBuffer directory = new StringBuffer();
    96     directory.append(Character.toLowerCase((char) level.charAt(0)));
    97 
    98     int c, w;
    99     w = 0;
    100     c = 0;
    101     while (c < field.length() && w < 2) {
    102       char ch = field.charAt(c);
    103 
    104       ch = Character.toLowerCase(ch);
    105       if (Character.isLetter(ch)) {
    106     if (ch != 'a' && ch != 'e' && ch != 'i' &&
    107         ch != 'o' && ch != 'u') {
    108       directory.append(ch);
    109       w++;
    110     }
    111       }
    112       c ++;
    113     }
    114     return directory.toString();
    115   }
     134//    private String getIndexDirectory(String level, String field)
     135//    { StringBuffer directory = new StringBuffer();
     136//      directory.append(Character.toLowerCase((char) level.charAt(0)));
     137
     138//      int c, w;
     139//      w = 0;
     140//      c = 0;
     141//      while (c < field.length() && w < 2) {
     142//        char ch = field.charAt(c);
     143
     144//        ch = Character.toLowerCase(ch);
     145//        if (Character.isLetter(ch)) {
     146//      if (ch != 'a' && ch != 'e' && ch != 'i' &&
     147//          ch != 'o' && ch != 'u') {
     148//        directory.append(ch);
     149//        w++;
     150//      }
     151//        }
     152//        c ++;
     153//      }
     154//      return directory.toString();
     155//    }
    116156
    117157  /**
     
    126166    if (label.equals(IndexerManager.outputDir)) {
    127167      this.outputDirectory = value;
    128       this.textStem   = value + "/text/index";
    129168      this.pass = 0;
    130169   
     
    139178    return false;
    140179      }
     180      this.textStem = this.textDirectory.getPath() + File.separator + INDEX_FILE_STEM;
    141181
    142182      // Sign to the user which mg directory is being used...
     
    159199  private Node recurseDOM(DocumentInterface metsDoc, Node node,
    160200              AbstractStructure structure, StringBuffer textBuffer,
    161               StringBuffer extraBuffer, String indexName,
    162               String namespace, String field)
     201              StringBuffer extraBuffer, String namespace)
     202              //String indexName, String namespace, String field)
    163203  {
    164204    // send out the ctrl-c...if this is
    165205    if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) {
    166       if ((indexName != null) && indexName.startsWith("s")) {
     206    if ((this.indexName != null) && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //indexName.startsWith("s")) {
    167207    METSDivision division = (METSDivision) structure;
    168208
     
    179219    }
    180220
    181     descriptive.addMetadata("gsdl3", "mgseqno", indexName + "." + Integer.toString(this.sectionSeqNo));
     221    descriptive.addMetadata("gsdl3", "mgseqno", this.indexName + "." + Integer.toString(this.sectionSeqNo));
    182222    metsDoc.setModified(true);
    183223    //  System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
     
    212252    descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
    213253    if (descriptive != null) {
    214       List values = descriptive.getMetadata(namespace, field);
     254      List values = descriptive.getMetadata(namespace, this.field);
    215255     
    216256      if (values != null) {
     
    240280      // while this node isn't the child's start node, produce the HTML node text, if
    241281      // in text field mode...
    242       if (field.equals("text")) {
     282      if (this.field.equals("text")) {
    243283    while (node != startNode) {
    244284      XPointer.printNode(node, textBuffer, false);
    245285
    246286      // print buffer to node
    247       node = XPointer.getNextNode(node, (field.equals("text") ? textBuffer : null));
     287      node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null));
    248288    }
    249289      }
    250290     
    251291      // recurse to child
    252       node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, indexName, namespace, field);
     292      node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // indexName, namespace, field);
    253293    }
    254294
     
    257297    if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) {
    258298      while (node != null) {
    259     if (field.equals("text")) {
     299    if (this.field.equals("text")) {
    260300      XPointer.printNode(node, textBuffer, false);
    261301    }
    262     node = XPointer.getNextNode(node, (field.equals("text") ? textBuffer : null));
     302    node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null));
    263303      }
    264304      /*
     
    270310  }
    271311
    272   private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure,
    273                 String indexName, String namespace, String field)
     312    private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace)
     313    //  String indexName, String namespace, String field)
    274314  { StringBuffer extraBuffer = new StringBuffer();
    275315    Node node = document.getDocumentElement();
    276316    StringBuffer textBuffer = new StringBuffer();
    277317
    278     this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, indexName, namespace, field);
     318    this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //indexName, namespace, field);
    279319    textBuffer.append(extraBuffer.toString());
    280320    return textBuffer.toString();
     
    313353      METSStructure sections = document.getDocumentStructure().getStructure("Section");
    314354      if (sections != null) {
    315     docText = this.prepareDOM(document, domDocument, sections, this.indexName, "gsdl3", this.field);
     355    docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.indexName, "gsdl3", this.field);
    316356    //  System.out.println(docText);
    317357      }
     
    429469
    430470      int indexNo = (this.pass - 2) / 2;
     471      MGIndex index = null;
    431472      if (this.pass >= 2) {
    432     MGIndex index = (MGIndex) this.indexes.get(indexNo);
    433      
    434     // attempt to ensure that the text subdirectory exists
    435     this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField()));
    436     if (!indexDirectory.exists()) {
    437       if (!indexDirectory.mkdir()) {
    438         return false;
    439       }
    440     }
    441     else if (!indexDirectory.isDirectory()) {
    442       return false;
    443     }
     473      index = (MGIndex) this.indexes.get(indexNo);
     474      if (index.hasError()) {
     475          // an error has already occurred for this index, don't continue
     476          System.out.println("pass "+this.pass+": aborted due to errors in the previous pass");
     477          return false;
     478      }
     479      // attempt to ensure that the text subdirectory exists
     480      //this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField()));
     481      this.indexDirectory = new File(outputDirectory, index.getName());
     482      if (!indexDirectory.exists()) {
     483          if (!indexDirectory.mkdir()) {
     484          return false;
     485          }
     486      }
     487      else if (!indexDirectory.isDirectory()) {
     488          return false;
     489      }
    444490
    445491    this.level = index.getLevel();
    446492    this.field = index.getField();
    447493    this.indexName = index.getName();
    448     if (this.indexName == null || this.indexName.length() == 0) {
    449       this.indexName = this.getIndexDirectory(index.getLevel(), index.getField());
    450     }
    451     this.indexStem = this.outputDirectory + File.separatorChar +
    452       this.indexName + File.separatorChar + "index"; // TODO: modify for index
     494    if (this.level == null || this.field == null ) {
     495        System.out.println("invalid index - level or field was null");
     496        return false;
     497    }
     498    //if (this.indexName == null || this.indexName.length() == 0) {
     499        //this.indexName = this.getIndexDirectory(index.getLevel(), index.getField());
     500    //}
     501    this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM;  // TODO: modify for index
    453502    if (this.pass % 2 == 1) {
    454       this.indexName = null;
     503        this.indexName = null; // why???
    455504    }
    456505      }
     
    468517
    469518      int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
    470 
     519     
    471520      try {
    472521    switch (mgPass) {
     
    484533       
    485534      case 3:
    486         Process p = Runtime.getRuntime().exec("mg_perf_hash_build -f index -d " + this.indexDirectory.toString());
    487         p.waitFor();
    488         if (p.exitValue() == 0) {
    489           System.out.println("Perfect hashes completed");
    490         }
    491        
    492535        mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -2 -c 3 -G -t 10 -N2");
    493536        break;
     
    501544      { System.out.println(ex);
    502545        ex.printStackTrace();
     546    index.setError(true);
    503547    return false;
    504548      }
    505       catch (InterruptedException ex)
    506       { System.out.println(ex);
    507         ex.printStackTrace();
    508     return false;
    509       }
     549//        catch (InterruptedException ex)
     550//        { System.out.println(ex);
     551//          ex.printStackTrace();
     552//      index.setError(true);
     553//      return false;
     554//        }
    510555      System.out.println("Pass " + this.pass);
    511556      return true;
     
    517562    public boolean endPass(int passNumber)
    518563    { Process p;
    519        
    520       try {
     564   
     565    int indexNo = (passNumber - 2) / 2;
     566    MGIndex index = null;
     567    if (passNumber >= 2) {
     568    index = (MGIndex) this.indexes.get(indexNo);
     569    }
     570    try {
    521571    this.indexerTextfeed.write(END_OF_DOCUMENT);
    522572    this.indexerTextfeed.write(END_OF_STREAM);
     
    541591      { System.out.println(ex);
    542592      }
    543       System.out.println("Pass " + this.pass + " completed with " + this.mg_passes.exitValue());
    544 
     593    int exitValue = this.mg_passes.exitValue();
     594      System.out.println("Pass " + this.pass + " completed with " + exitValue);
     595      if (exitValue !=0) {
     596      //assume something has gone wrong, don't continue
     597      if (index != null) {
     598          index.setError(true);
     599          return false;
     600      }
     601      }
    545602      int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
    546 
    547603      try {
    548604    switch (mgPass)
     
    554610        if (p.exitValue() != 0) {
    555611          System.out.println("Error from mg_compression_dict: " + p.exitValue());
     612         
     613          return false;
    556614        }
    557615        else {
     
    559617        }
    560618      break;
     619
     620    case 2:
     621        System.out.println("Creating perfect hash");
     622        p = Runtime.getRuntime().exec("mg_perf_hash_build -f index -d " + this.indexDirectory.toString());
     623        p.waitFor();
     624        if (p.exitValue() == 0) {
     625          System.out.println("Perfect hashes completed");
     626        } else {
     627        System.out.println("Unable to build the perfect hash");
     628        index.setError(true);
     629        return false;
     630        }
     631        break;
    561632
    562633      case 3:
     
    569640        else {
    570641          System.out.println("Unable to create weights file " + "mg_weights_build -f " + this.indexStem + " -t " + this.textStem + " -d /");
     642          index.setError(true);
     643          return false;
     644
    571645        }
    572646
     
    578652        else {
    579653          System.out.println("Unable to create inverted dictionary file");
     654          index.setError(true);
     655          return false;
     656
    580657        }
    581658       
     
    583660        p.waitFor();
    584661        if (p.exitValue() == 0) {
    585           System.out.println("Stemmed index successfully written");
     662          System.out.println("Stemmed index 1 successfully written");
    586663        }
    587664        else {
    588           System.out.println("Unable to create stemmed index");
     665          System.out.println("Unable to create stemmed index 1");
     666          index.setError(true);
     667          return false;
     668
    589669        }
    590670
     
    592672        p.waitFor();
    593673        if (p.exitValue() == 0) {
    594           System.out.println("Stemmed index successfully written");
     674          System.out.println("Stemmed index 2 successfully written");
    595675        }
    596676        else {
    597           System.out.println("Unable to create stemmed index");
     677          System.out.println("Unable to create stemmed index 2");
     678          index.setError(true);
     679          return false;
    598680        }
    599681
     
    601683        p.waitFor();
    602684        if (p.exitValue() == 0) {
    603           System.out.println("Stemmed index successfully written");
     685          System.out.println("Stemmed index 3 successfully written");
    604686        }
    605687        else {
    606           System.out.println("Unable to create stemmed index");
     688          System.out.println("Unable to create stemmed index 3");
     689          index.setError(true);
     690          return false;
    607691        }
    608692      break;
     
    612696      { System.out.println(ex);
    613697        ex.printStackTrace();
     698    index.setError(true);
    614699    return false;
    615700      }
     
    617702      { System.out.println(ex);
    618703        ex.printStackTrace();
     704    index.setError(true);
    619705    return false;
    620706      }
     
    635721    { return 2 + this.indexes.size() * 2;
    636722    }
     723
     724    public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) {
     725    System.out.println("adding service description, MGIndexer");
     726    Document doc = service_rack_list.getOwnerDocument();
     727
     728    // generate the list of indexes
     729    Element index_list = doc.createElement(GSXML.INDEX_ELEM+GSXML.LIST_MODIFIER);
     730    boolean found_index = false;
     731    String def_index = ""; // the default index will just be the first one created for now.
     732    for (int i=0; i<this.indexes.size(); i++) {
     733        MGIndex index = (MGIndex)this.indexes.get(i);
     734        if (!index.hasError()) {
     735        Element e = doc.createElement(GSXML.INDEX_ELEM);
     736        e.setAttribute(GSXML.NAME_ATT, index.getName());
     737        index_list.appendChild(e);
     738        if (found_index == false) {
     739            // this is the first index
     740            found_index = true;
     741            def_index = index.getName();
     742        }
     743        }
     744    }
     745   
     746    if (!found_index) {
     747        // no indexes were able to be created, so we can't use them or the text
     748        return false;
     749    }
     750    Element default_index = doc.createElement("defaultIndex");
     751    default_index.setAttribute(GSXML.NAME_ATT, def_index);
     752
     753    Element search_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
     754    Element retrieve_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
     755    service_rack_list.appendChild(search_service_elem);
     756    service_rack_list.appendChild(retrieve_service_elem);
     757
     758    search_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGSearch");
     759   
     760    search_service_elem.appendChild(index_list);
     761    search_service_elem.appendChild(default_index);
     762    retrieve_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGRetrieve");
     763    retrieve_service_elem.appendChild(default_index.cloneNode(true));
     764    return true;
     765    }
     766
    637767}
Note: See TracChangeset for help on using the changeset viewer.