Changeset 7308


Ignore:
Timestamp:
2004-05-11T15:28:00+12:00 (20 years ago)
Author:
kjdon
Message:

changed the names of a couple of class variables to make it clearer what they are, and because the compressed text is always done by section, made sure that the section mg numbers are added into the database. One instantiation of the indexer shoudl always use the same sectioning (if want parallel structures use two <search> bits), so make the mg numbers all use the same name - the overall index name. so each section only has one mg index number now, based on the overall name for the search element. not one number per index.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java

    r6898 r7308  
    3535  String       textStem;
    3636  List         indexes;
    37   String       indexName;
    38   String       level;
    39   String       field;
    40   String       name;
     37    String       overallName;
     38   
     39    String       currentIndexName;
     40    String       currentIndexLevel;
     41    String       currentIndexField;
     42
    4143
    4244  static final char END_OF_DOCUMENT = (char) 2;
     
    117119
    118120      }
    119   }
     121  } // MGIndex
    120122
    121123  public MGIndexer(String name)
    122124  { this.indexes = new ArrayList();
    123     this.name    = name;
     125    this.overallName = name;
    124126  }
    125127
     
    129131
    130132  public String getName()
    131   { return this.name;
    132   }
     133    { return this.overallName;
     134    }
    133135
    134136//    private String getIndexDirectory(String level, String field)
     
    200202              AbstractStructure structure, StringBuffer textBuffer,
    201203              StringBuffer extraBuffer, String namespace)
    202               //String indexName, String namespace, String field)
     204              //String name, String namespace, String field)
    203205  {
    204206    // send out the ctrl-c...if this is
    205207    if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) {
    206     if ((this.indexName != null) && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //indexName.startsWith("s")) {
    207     METSDivision division = (METSDivision) structure;
    208 
    209     // get the division metadata block
    210     METSDescriptive descriptive;
    211     String metadataId = division.getDefaultMetadataReference();
    212     if (metadataId == null) {
    213       descriptive = metsDoc.getDocumentMetadata().createDescriptive(division.getLabel());
    214       division.addMetadataReference(descriptive.getID());
     208    // try doing this for all index types
     209    if ((this.currentIndexName != null)) { // && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //name.startsWith("s")) {
     210        METSDivision division = (METSDivision) structure;
     211
     212        // get the division metadata block
     213        METSDescriptive descriptive;
     214        String metadataId = division.getDefaultMetadataReference();
     215        if (metadataId == null) {
     216        descriptive = metsDoc.getDocumentMetadata().createDescriptive(division.getLabel());
     217        division.addMetadataReference(descriptive.getID());
     218        }
     219        else {
     220        // Get the descriptive item...
     221        descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
     222        }
     223       
     224        descriptive.addMetadata("gsdl3", "mgseqno", this.overallName + "." + Integer.toString(this.sectionSeqNo));
     225        metsDoc.setModified(true);
     226        //  System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
     227    } // section level
     228   
     229    // append an 'end of section' marker
     230    textBuffer.append(END_OF_SECTION);
     231    this.sectionSeqNo ++;
     232   
     233    // for document-level indexes, always append an 'end of document' tag at the
     234    // end of the document for each section.  Otherwise, each section is followed
     235    // by an end of document character.  This ensures that all indexes use the
     236    // same document numbering...
     237    if (this.currentIndexLevel == null ||
     238        this.currentIndexLevel.equals(IndexerInterface.DOCUMENT_LEVEL)) {
     239        extraBuffer.append(END_OF_DOCUMENT);
    215240    }
    216241    else {
    217       // Get the descriptive item...
    218       descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
    219     }
    220 
    221     descriptive.addMetadata("gsdl3", "mgseqno", this.indexName + "." + Integer.toString(this.sectionSeqNo));
    222     metsDoc.setModified(true);
    223     //  System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
    224       }
    225      
    226       // append an 'end of section' marker
    227       textBuffer.append(END_OF_SECTION);
    228       this.sectionSeqNo ++;
    229 
    230       // for document-level indexes, always append an 'end of document' tag at the
    231       // end of the document for each section.  Otherwise, each section is followed
    232       // by an end of document character.  This ensures that all indexes use the
    233       // same document numbering...
    234       if (this.level == null ||
    235       this.level.equals(IndexerInterface.DOCUMENT_LEVEL)) {
    236     extraBuffer.append(END_OF_DOCUMENT);
    237       }
    238       else {
    239     textBuffer.append(END_OF_DOCUMENT);
    240     this.documentSeqNo ++;
    241       }
    242 
    243       // produce the body here for metadata output of divisions - in the case of
     242        textBuffer.append(END_OF_DOCUMENT);
     243        this.documentSeqNo ++;
     244    }
     245   
     246    // produce the body here for metadata output of divisions - in the case of
    244247      // text output, that will happen below...
    245       if (!this.field.equals("text"))
     248      if (!this.currentIndexField.equals("text"))
    246249      { METSDescriptive descriptive;
    247250   
     
    252255    descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
    253256    if (descriptive != null) {
    254       List values = descriptive.getMetadata(namespace, this.field);
     257      List values = descriptive.getMetadata(namespace, this.currentIndexField);
    255258     
    256259      if (values != null) {
     
    261264          textBuffer.append(value);
    262265          if (valueIter.hasNext()) {
    263         textBuffer.append(END_OF_SECTION);
     266          textBuffer.append(END_OF_SECTION);
    264267          }
    265268        }
     
    280283      // while this node isn't the child's start node, produce the HTML node text, if
    281284      // in text field mode...
    282       if (this.field.equals("text")) {
     285      if (this.currentIndexField.equals("text")) {
    283286    while (node != startNode) {
    284287      XPointer.printNode(node, textBuffer, false);
    285288
    286289      // print buffer to node
    287       node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null));
     290      node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
    288291    }
    289292      }
    290293     
    291294      // recurse to child
    292       node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // indexName, namespace, field);
     295      node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // name, namespace, field);
    293296    }
    294297
     
    297300    if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) {
    298301      while (node != null) {
    299     if (this.field.equals("text")) {
     302    if (this.currentIndexField.equals("text")) {
    300303      XPointer.printNode(node, textBuffer, false);
    301304    }
    302     node = XPointer.getNextNode(node, (this.field.equals("text") ? textBuffer : null));
     305    node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
    303306      }
    304307      /*
     
    308311    }
    309312    return node;
    310   }
     313    }
    311314
    312315    private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace)
    313     //  String indexName, String namespace, String field)
     316    //  String name, String namespace, String field)
    314317  { StringBuffer extraBuffer = new StringBuffer();
    315318    Node node = document.getDocumentElement();
    316319    StringBuffer textBuffer = new StringBuffer();
    317320
    318     this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //indexName, namespace, field);
     321    this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //name, namespace, field);
    319322    textBuffer.append(extraBuffer.toString());
    320323    return textBuffer.toString();
     
    353356      METSStructure sections = document.getDocumentStructure().getStructure("Section");
    354357      if (sections != null) {
    355     docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.indexName, "gsdl3", this.field);
     358    docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field);
    356359    //  System.out.println(docText);
    357360      }
    358361    }
    359362    if (docText == null) {
    360       if (this.field.equals("text")) {
    361     docText = Character.toString(END_OF_DOCUMENT) + Character.toString(END_OF_SECTION) +
     363      if (this.currentIndexField.equals("text")) {
     364      docText = Character.toString(END_OF_DOCUMENT) + Character.toString(END_OF_SECTION) +
    362365      document.getDocumentText();
    363366      }
     
    366369    textBuffer.append(END_OF_DOCUMENT);
    367370    textBuffer.append(END_OF_SECTION);
    368     List values = document.getDocumentMetadataItem("gsdl3", this.field);
     371    List values = document.getDocumentMetadataItem("gsdl3", this.currentIndexField);
    369372    if (values != null) {
    370373      Iterator valueIter = values.iterator();
     
    374377        textBuffer.append(value);
    375378        if (valueIter.hasNext()) {
    376           textBuffer.append(END_OF_SECTION);
     379        textBuffer.append(END_OF_SECTION);
    377380          //          sectionSeqNo ++;
    378381        }
     
    432435    }
    433436
    434     // remember that we're not on the first document, assign the sequence number
    435     // on the first pass only, and increment the sequence number.
     437    // remember that we're not on the first document,
    436438    this.firstDocument = false;
     439    // assign the sequence number on the first pass only, and increment the sequence number.
    437440    if (this.pass == 0) {
    438       document.addDocumentMetadata("gsdl3", "mgseqno", "dtx."+Integer.toString(startSeqNo));
     441    //document.addDocumentMetadata("gsdl3", "mgseqno", "dtx."+Integer.toString(startSeqNo));
     442    document.addDocumentMetadata("gsdl3", "mgseqno", this.overallName+"."+Integer.toString(startSeqNo));
    439443      //System.out.println("Assigning " + startSeqNo + " to " + document.getID());
    440444    }
     
    489493      }
    490494
    491     this.level = index.getLevel();
    492     this.field = index.getField();
    493     this.indexName = index.getName();
    494     if (this.level == null || this.field == null ) {
     495      this.currentIndexLevel = index.getLevel();
     496      this.currentIndexField = index.getField();
     497      this.currentIndexName = index.getName();
     498     
     499    if (this.currentIndexLevel == null || this.currentIndexField == null ) {
    495500        System.out.println("invalid index - level or field was null");
    496501        return false;
    497502    }
    498     //if (this.indexName == null || this.indexName.length() == 0) {
    499         //this.indexName = this.getIndexDirectory(index.getLevel(), index.getField());
    500     //}
     503    //if (this.currentIndexName == null || this.currentIndexName.length() == 0) {
     504    // this.currentIndexName = getIndexDirectory(index.getLevel(), index.getField());
     505    //  }
    501506    this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM;  // TODO: modify for index
    502507    if (this.pass % 2 == 1) {
    503         this.indexName = null; // why???
     508        this.currentIndexName = null; // why???
    504509    }
    505510      }
    506511      else {
    507     this.field = "text";
    508     this.level = "section";
    509     this.indexName = null;
    510       }
    511       System.out.println("level is " + this.level);
    512       System.out.println("field is " + this.field);
    513       System.out.println("index name is " + this.indexName);
     512     
     513      this.currentIndexField = "text";
     514      this.currentIndexLevel = "section";
     515      this.currentIndexName = null;
     516      }
    514517     
    515518      // get the parameters for this execution of mg_passes
     
    547550    return false;
    548551      }
    549 //        catch (InterruptedException ex)
     552      //        catch (InterruptedException ex)
    550553//        { System.out.println(ex);
    551554//          ex.printStackTrace();
     
    556559      return true;
    557560    }
     561 
    558562
    559563    /**
     
    562566    public boolean endPass(int passNumber)
    563567    { Process p;
    564    
    565     int indexNo = (passNumber - 2) / 2;
    566     MGIndex index = null;
     568   
     569    int indexNo = (passNumber - 2) / 2;
     570    MGIndex index = null;
    567571    if (passNumber >= 2) {
    568572    index = (MGIndex) this.indexes.get(indexNo);
     
    584588    Thread.sleep(1000);
    585589    this.mg_passes.waitFor();
    586       }
    587       catch (IOException ex)
     590    }
     591    catch (IOException ex)
    588592      { System.out.println(ex);
    589593      }
     
    592596      }
    593597    int exitValue = this.mg_passes.exitValue();
    594       System.out.println("Pass " + this.pass + " completed with " + exitValue);
     598    System.out.println("Pass " + this.pass + " completed with " + exitValue);
    595599      if (exitValue !=0) {
    596600      //assume something has gone wrong, don't continue
     
    764768    return true;
    765769    }
    766 
     770   
    767771}
     772   
Note: See TracChangeset for help on using the changeset viewer.