Context Navigation

← Previous Changeset
Next Changeset →

Changeset 8966

Timestamp:

2005-02-04T12:11:45+13:00 (19 years ago)

Author:

kjdon

Message:

got it sort of going for doc text. need to make it look at the index specification

File:

: 1 edited

trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java

-              r8927
+              r8966
 package org.greenstone.gsdl3.gs3build.indexers;
+import org.greenstone.mgpp.MGPPPassesWrapper;
 import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
 import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
+import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocument;
+import org.greenstone.gsdl3.gs3build.doctypes.METSDocument;
+import org.greenstone.gsdl3.util.Misc;
+import org.greenstone.gsdl3.util.GSXML;
+import org.greenstone.gsdl3.util.Processing;
+import org.greenstone.gsdl3.gs3build.xpointer.XPointer;
+import org.greenstone.gsdl3.gs3build.metadata.*;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.io.File;
+import java.util.Iterator;
 import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.Document;
 public class MGPPIndexer extends AbstractIndexer
 …
     int          pass;
     int          documentSeqNo;
+    int          sectionSeqNo;
     String       name;
     boolean      firstDocument;
+    File indexDirectory;
+    File textDirectory;
+    String       indexStem;
+    String       textStem;
+    StringBuffer indexBuffer;
     String       outputDirectory;
+    String       outputStem;
+    String       passExtra;
+    InputStream  indexerFeedback;
+    InputStream  indexerErrors;
+    OutputStream indexerTextfeed;
+    Process      mgpp_passes;
+    //String       outputStem;
+//     String       passExtra;
+//     InputStream   indexerFeedback;
+//     InputStream  indexerErrors;
+//     OutputStream indexerTextfeed;
+//     Process      mgpp_passes;
+    //String       overallName;
+    String       currentIndexName;
+    String       currentIndexLevel;
+       String       currentIndexField;
+    MGPPPassesWrapper mgppPasses;
     static final String documentSeparator = "<Document>";
     static final String sectionSeparator = "<Section>";
+    static final String START_OF_DOCUMENT = "<Document>";
+    static final String END_OF_DOCUMENT = "</Document>";
+    static final String START_OF_SECTION = "<Section>";
+    static final String END_OF_SECTION = "</Section>";
     public static final String MGPP_INDEX_TYPE = "mgpp";
     public static final String INDEX_FILE_STEM = "index";
+    class MGPPIndex
+    {
+    public String name = null;
+    public String doc_level = null;
+    public ArrayList levels = null;
+    public ArrayList fields = null;
+    boolean error = false;// assume built until we get an error
+    public MGPPIndex(String name) {
+        this.name = name;
+        doc_level = "Document";
+    }
+    public void setDocLevel(String doc_level) {
+        this.doc_level = doc_level;
+    }
+    public void addLevel(String level) {
+        this.levels.add(level);
+    }
+    // change to allow nested fields
+    public void addField(String field) {
+        this.fields.add(field);
+    }
+    public boolean hasError() {
+        return this.error;
+    }
+    public void setError(boolean b) {
+        this.error = b;
+    }
+    }
     public MGPPIndexer(String name)
+    {
 …
     public boolean configure(String label, String value)
+    {
     if (label.equals("outputDir")) {
+    if (label.equals(IndexerManager.outputDir)) {
         this.outputDirectory = value;
+        this.outputStem = value + "/index";
+        // attempt to ensure that the text subdirectory exists
+        this.textDirectory = new File(outputDirectory, "text");
+        if (!textDirectory.exists()) {
+        if (!textDirectory.mkdir()) {
+            return false;
+        }
+        }
+        else if (!textDirectory.isDirectory()) {
+        return false;
+        }
+        this.textStem = this.textDirectory.getPath() + File.separator + INDEX_FILE_STEM;
+        // attempt to ensure that the index subdir exists
+        this.indexDirectory = new File(outputDirectory, "idx");
+        if (!indexDirectory.exists()) {
+        if (!indexDirectory.mkdir()) {
+            return false;
+        }
+        }
+        else if (!indexDirectory.isDirectory()) {
+        return false;
+        }
+        this.indexStem = this.indexDirectory.getPath() + File.separator + INDEX_FILE_STEM;
+        // Sign to the user which mg directory is being used...
+        System.out.println("Output MGPP text directory is " + this.textStem);
+        System.out.println("Output MGPP index directory is " + this.indexStem);
+    }
     this.pass = 0;
 …
     public boolean addIndex(String name, String level, String field)
+    {
     if (level == "doc_level") {
         passExtra = " -J " + level;
+    }
     else {
         passExtra = " -K " + level;
+    }
+//  if (level == "doc_level") {
+//      passExtra = " -J " + level;
+//  }
+//  else {
+//      passExtra = " -K " + level;
+//  }
     return true;
+    }
 …
+    }
+    // why do this at the start and not at the end???
     if (!this.firstDocument) {
+        // Send a '<document>' before the document itself
+        try {
+        this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length);
+        }
+        catch (IOException ex) {
+        System.out.println("Bad output on end of document" + ex);
+        ex.printStackTrace();
+        return false;
+        }
+    }
+    String docText = document.getDocumentText();
+    int startSeqNo = this.documentSeqNo;
+    byte [] bytes = docText.getBytes();
+    int pos = 0, end = bytes.length;
+    try {
+        while (pos < end) {
+        this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
+        pos = pos + 512;
+        try {
+            while (this.indexerFeedback.available() > 0) {
+            byte b[] = new byte[this.indexerFeedback.available()];
+            System.out.println("Feedback of " + this.indexerFeedback.available());
+            this.indexerFeedback.read(b);
+            System.out.println(b);
+            }
+        }
+        catch (IOException ex) {
+        // Send a '</Document>' at the end of the doc
+        this.indexBuffer.append(END_OF_DOCUMENT);
+        mgppPasses.processDocument(indexBuffer.toString());
+        this.indexBuffer.delete(0, this.indexBuffer.length());
+    }
+    String docText = null;
+    int startSeqNo = this.sectionSeqNo;
+    this.sectionSeqNo ++;
+    Document domDocument = document.getDOMDocument();
+    if (domDocument != null) {
+        System.err.println("dom doc is not null");
+        METSStructure sections = document.getDocumentStructure().getStructure("Section");
+        if (sections != null) {
+        System.err.println("sections are not null");
+        docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field);
+        //  System.out.println(docText);
+        }
+    }
+    if (docText == null) {
+        System.err.println("dom doc or sections was null - asking for doc text");
+        //if (this.currentIndexField.equals("text")) {
+        //docText = Character.toString(END_OF_DOCUMENT) + document.getDocumentText();
+        docText = document.getDocumentText();
+        //}
+//      else {
+//      StringBuffer textBuffer = new StringBuffer();
+//      //textBuffer.append(END_OF_DOCUMENT);
+//      List values = document.getDocumentMetadataItem("gsdl3", this.currentIndexField);
+//      if (values != null) {
+//          Iterator valueIter = values.iterator();
+//          while (valueIter.hasNext()) {
+//          String value = valueIter.next().toString();
+//          textBuffer.append(value);
+//          if (valueIter.hasNext()) {
+//              //textBuffer.append(END_OF_SECTION);
+//              //        sectionSeqNo ++;
+//          }
+//          }
+//      }
+//      else {
+//          textBuffer.append("No data");
+//      }
+//      docText = textBuffer.toString();
+//      }
+        sectionSeqNo ++;
+    }
+        //try {
+        //  this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length);
+        // }
+//      catch (IOException ex) {
+//      System.out.println("Bad output on end of document" + ex);
+//      ex.printStackTrace();
+//      return false;
+//      }
+//  }
+    this.indexBuffer.append(START_OF_DOCUMENT);
+    //String docText = document.getDocumentText();
+    this.indexBuffer.append(docText);
+    //int startSeqNo = this.documentSeqNo;
+//  byte [] bytes = docText.getBytes();
+//  int pos = 0, end = bytes.length;
+//  try {
+//      while (pos < end) {
+//      this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
+//      pos = pos + 512;
+//      try {
+//          while (this.indexerFeedback.available() > 0) {
+//          byte b[] = new byte[this.indexerFeedback.available()];
+//          System.out.println("Feedback of " + this.indexerFeedback.available());
+//          this.indexerFeedback.read(b);
+//          System.out.println(b);
+//          }
+//      }
+//      catch (IOException ex) {
+        }
         try {
             while (this.indexerErrors.available() > 0) {
             byte b[] = new byte[this.indexerErrors.available()];
             System.out.println("Feedback of " + this.indexerErrors.available());
             this.indexerErrors.read(b);
             System.out.println(new String(b));
+            }
+        }
         catch (IOException ex){
+//      }
+//      try {
+//          while (this.indexerErrors.available() > 0) {
+//          byte b[] = new byte[this.indexerErrors.available()];
+//          System.out.println("Feedback of " + this.indexerErrors.available());
+//          this.indexerErrors.read(b);
+//          System.out.println(new String(b));
+//          }
+//      }
+//      catch (IOException ex){
+        }
+        }
+    }
     catch (IOException ex) {
         System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
         ex.printStackTrace();
         return false;
+    }
+//      }
+//      }
+//  }
+//  catch (IOException ex) {
+//      System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
+//      ex.printStackTrace();
+//      return false;
+//  }
     this.firstDocument = false;
 …
     this.documentSeqNo += 1;
     try {
         while (this.indexerErrors.available() > 0) {
         char c = (char) this.indexerErrors.read();
         System.out.println(c);
+        }
         while (this.indexerFeedback.available() > 0) {
         byte b[] = new byte[this.indexerFeedback.available()];
         System.out.println("Feedback of " + this.indexerFeedback.available());
         this.indexerFeedback.read(b);
+        }
+    }
     catch (IOException ex) {
+    }
+//  try {
+//      while (this.indexerErrors.available() > 0) {
+//      char c = (char) this.indexerErrors.read();
+//      System.out.println(c);
+//      }
+//      while (this.indexerFeedback.available() > 0) {
+//      byte b[] = new byte[this.indexerFeedback.available()];
+//      System.out.println("Feedback of " + this.indexerFeedback.available());
+//      this.indexerFeedback.read(b);
+//      }
+//  }
+//  catch (IOException ex) {
+//  }
     return true;
+    }
 …
     this.documentSeqNo = 1;
+    try {
+        switch (this.pass) {
+        case 0:
+        mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1");
+        break;
+        case 1:
+        mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2");
+        break;
+        case 2:
+        mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1");
+        break;
+        case 3:
+        Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem);
+        p.waitFor();
+        mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2");
+        break;
+        }
+        this.indexerFeedback = mgpp_passes.getInputStream();
+        this.indexerErrors   = mgpp_passes.getErrorStream();
+        this.indexerTextfeed = mgpp_passes.getOutputStream();
+    }
+    catch (IOException ex) {
+        System.out.println(ex);
+        ex.printStackTrace();
+        return false;
+    }
+    catch (InterruptedException ex) {
+        System.out.println(ex);
+        ex.printStackTrace();
+        return false;
+    }
+    this.mgppPasses = new MGPPPassesWrapper();
+    this.indexBuffer = new StringBuffer();
+    MGPPIndex index = null; // do something with this!!
+    // get the parameters for this execution of mg_passes
+    mgppPasses.setFileName((this.pass < 2 ? this.textStem : this.indexStem ));
+    if (!Misc.isWindows()) {
+        mgppPasses.setBasePath("/");
+    }
+    mgppPasses.setDocumentTag("Document");
+    //mgppPasses.addLevelTag("Section");
+    this.currentIndexLevel = "Document";// index.getLevel();
+    this.currentIndexField = "text";//index.getField();
+    this.currentIndexName = "idx"; //index.getName();
+    switch (this.pass) {
+    case 0:
+        // -T1
+        mgppPasses.addPass(MGPPPassesWrapper.TEXT_PASS_1);
+        //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1");
+        break;
+    case 1:
+        // -T2
+        mgppPasses.addPass(MGPPPassesWrapper.TEXT_PASS_2);
+        //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2");
+        break;
+    case 2:
+        // -I1
+        mgppPasses.addPass(MGPPPassesWrapper.INDEX_PASS_1);
+        //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1");
+        break;
+    case 3:
+        //Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem);
+        //p.waitFor();
+        // -I2
+        mgppPasses.addPass(MGPPPassesWrapper.INDEX_PASS_2);
+        //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2");
+        break;
+    }
+    //this.indexerFeedback = mgpp_passes.getInputStream();
+    //   this.indexerErrors   = mgpp_passes.getErrorStream();
+    //   this.indexerTextfeed = mgpp_passes.getOutputStream();
+    //  }
+    //catch (IOException ex) {
+    //   System.out.println(ex);
+    //   ex.printStackTrace();
+    //   return false;
+    //}/
+    //catch (InterruptedException ex) {
+    //   System.out.println(ex);
+    //   ex.printStackTrace();
+    //   return false;
+    //}
+    mgppPasses.init();
     System.out.println("Pass " + this.pass);
     return true;
 …
     // TODO: end pass
     Process p;
+    MGPPIndex index = null; // do something with this!!
     try {
+        this.indexerTextfeed.write((char) 2);
+        this.indexerTextfeed.write(4);
+        while (this.indexerErrors.available() > 0) {
+        char c = (char) this.indexerErrors.read();
+        System.out.print(c);
+        }
+        while (this.indexerFeedback.available() > 0) {
+        byte b[] = new byte[this.indexerFeedback.available()];
+        System.out.print("Feedback of " + this.indexerFeedback.available());
+        this.indexerFeedback.read(b);
+        }
+        this.indexerTextfeed.close();
+        Thread.sleep(1000);
+        this.mgpp_passes.waitFor();
+    }
+    catch (IOException ex) {
+        System.out.println(ex);
+        this.indexBuffer.append(END_OF_DOCUMENT);
+        mgppPasses.processDocument(indexBuffer.toString());
+        this.indexBuffer.delete(0, this.indexBuffer.length());
+        Thread.sleep(1000); // what for??
+    }
     catch (InterruptedException ex) {
         System.out.println(ex);
+    }
+    System.out.println("Completed with " + this.mgpp_passes.exitValue());
+    mgppPasses.finish();
     try {
+        switch (this.pass) {
+        case 0:
+        System.out.println("Compressing dictionary");
+        p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120");
+        p.waitFor();
+        System.out.println(p.exitValue());
+        break;
+        Thread.sleep(1000);
+    } catch (Exception e) {}
+    int exit_value = mgppPasses.exitValue();
+    System.out.println("Pass " + this.pass + " completed with " + exit_value);
+    if (exit_value !=0) {
+        //assume something has gone wrong, don't continue
+//      if (index != null) {
+//      index.setError(true);
+//      return false;
+//      }
+    }
+    String osextra = "";
+    if (!Misc.isWindows()) {
+        osextra = " -d / ";
+    }
+    switch (this.pass) {
+    case 0:
+        //System.exit(1);
+        System.out.println("Compressing dictionary");
+        exit_value = Processing.runProcess("mgpp_compression_dict -f " + this.textStem + " -S -H -2 -k 5120"+ osextra);
+        if (exit_value == 0) {
+        System.out.println("Compressed dictionary successfully written");
+        } else {
+        System.err.println("Error from mg_compression_dict: " + exit_value);
+        index.setError(true);
+        case 3:
+        p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem);
+        p.waitFor();
+        System.out.println(p.exitValue());
+        p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem);
+        p.waitFor();
+        System.out.println(p.exitValue());
+        p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory);
+        p.waitFor();
+        System.out.println(p.exitValue());
+        p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory);
+        p.waitFor();
+        System.out.println(p.exitValue());
+        p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory);
+        p.waitFor();
+        System.out.println(p.exitValue());
+        break;
+        }
+    }
+    catch (IOException ex) {
+        System.out.println(ex);
+        ex.printStackTrace();
+        return false;
+    }
+    catch (InterruptedException ex) {
+        System.out.println(ex);
+        ex.printStackTrace();
+        return false;
+    }
+        return false;
+        }
+        break;
+    case 2:
+        System.out.println("Creating perfect hash");
+        exit_value = Processing.runProcess("mgpp_perf_hash_build -f " + this.indexStem + osextra);
+        if (exit_value ==0) {
+        System.out.println("Perfect hashes completed");
+        } else {
+        System.err.println("Unable to build the perfect hash");
+        index.setError(true);
+        return false;
+        }
+        break;
+    case 3:
+        System.out.println("Writing weights file");
+        exit_value = Processing.runProcess("mgpp_weights_build -f " + this.indexStem + osextra);
+        if (exit_value ==0) {
+        System.out.println("Weights file successfully written");
+        } else {
+        System.err.println("Unable to create weights file");
+        index.setError(true);
+        return false;
+        }
+        System.out.println("Creating inverted dictionary");
+        exit_value = Processing.runProcess("mgpp_invf_dict -f " + this.indexStem + osextra);
+        if (exit_value ==0) {
+        System.out.println("Inverted dictionary file successfully written");
+        } else {
+        System.out.println("Unable to create inverted dictionary file");
+        index.setError(true);
+        return false;
+        }
+        System.out.println("Creating Stem indexes");
+        exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s1 -f " + this.indexStem +osextra);
+        if (exit_value == 0) {
+        System.out.println("Stemmed index 1 successfully written");
+        } else {
+        System.out.println("Unable to create stemmed index 1");
+        index.setError(true);
+        return false;
+        }
+        exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s2 -f " + this.indexStem + osextra);
+        if (exit_value == 0) {
+        System.out.println("Stemmed index 2 successfully written");
+        } else {
+        System.out.println("Unable to create stemmed index 2");
+        index.setError(true);
+        return false;
+        }
+        exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s3 -f " + this.indexStem + osextra);
+        if (exit_value == 0) {
+        System.out.println("Stemmed index 3 successfully written");
+        } else {
+        System.out.println("Unable to create stemmed index 3");
+        index.setError(true);
+        return false;
+        }
+        break;
+    } // switch
     return true;
+    }
 …
+    }
+    public boolean addServiceDescriptions(Element service_rack_list)
+    {
+    System.out.println("adding service description, MGPPIndexer");
+    public boolean addServiceDescriptions(Element service_rack_list) {
+    Document doc = service_rack_list.getOwnerDocument();
+    // generate the list of indexes
+    Element index_list = doc.createElement(GSXML.INDEX_ELEM+GSXML.LIST_MODIFIER);
+    Element e = doc.createElement(GSXML.INDEX_ELEM);
+    e.setAttribute(GSXML.NAME_ATT, "idx");
+    index_list.appendChild(e);
+    String def_index = "idx";
+//  boolean found_index = false;
+//  String def_index = ""; // the default index will just be the first one created for now.
+//  for (int i=0; i<this.indexes.size(); i++) {
+//      MGIndex index = (MGIndex)this.indexes.get(i);
+//      if (!index.hasError()) {
+//      Element e = doc.createElement(GSXML.INDEX_ELEM);
+//      e.setAttribute(GSXML.NAME_ATT, index.getName());
+//      index_list.appendChild(e);
+//      if (found_index == false) {
+//          // this is the first index
+//          found_index = true;
+//          def_index = index.getName();
+//      }
+//      }
+//  }
+//  if (!found_index) {
+//      // no indexes were able to be created, so we can't use them or the text
+//      return false;
+//  }
+    Element f = doc.createElement(GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER);
+    Element default_index = doc.createElement("defaultIndex");
+    default_index.setAttribute(GSXML.NAME_ATT, def_index);
+    Element base_index_name = doc.createElement("baseIndexPrefix");
+    base_index_name.setAttribute(GSXML.NAME_ATT, "index");  //overallName);
+    Element search_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
+    Element retrieve_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
+    Element default_level = doc.createElement("defaultLevel");
+    default_index.setAttribute(GSXML.NAME_ATT, "Document");
+    service_rack_list.appendChild(search_service_elem);
+    service_rack_list.appendChild(retrieve_service_elem);
+    search_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGPPSearch");
+    search_service_elem.appendChild(index_list);
+    search_service_elem.appendChild(default_index);
+    search_service_elem.appendChild(default_level);
+    search_service_elem.appendChild(base_index_name);
+    retrieve_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGPPRetrieve");
+    retrieve_service_elem.appendChild(default_level.cloneNode(true));
+    retrieve_service_elem.appendChild(base_index_name.cloneNode(true));
     return true;
+    }
+  private Node recurseDOM(DocumentInterface metsDoc, Node node,
+              AbstractStructure structure, StringBuffer textBuffer,
+              StringBuffer extraBuffer, String namespace)
+              //String name, String namespace, String field)
+  {
+    // send out the ctrl-c...if this is
+    if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) {
+    // try doing this for all index types
+    if ((this.currentIndexName != null)) { // && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //name.startsWith("s")) {
+        METSDivision division = (METSDivision) structure;
+        // get the division metadata block
+        METSDescriptive descriptive;
+        String metadataId = division.getDefaultMetadataReference();
+        if (metadataId == null) {
+        descriptive = metsDoc.getDocumentMetadata().createDescriptive(division.getLabel());
+        division.addMetadataReference(descriptive.getID());
+        }
+        else {
+        // Get the descriptive item...
+        descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
+        }
+        descriptive.addMetadata("gsdl3", "mgseqno", this.name + "." + Integer.toString(this.sectionSeqNo));
+        metsDoc.setChanged(true);
+        //metsDoc.setModified(true);
+        //  System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
+    } // section level
+    // append an 'end of section' marker
+    //textBuffer.append(END_OF_SECTION);
+    this.sectionSeqNo ++;
+    // for document-level indexes, always append an 'end of document' tag at the
+    // end of the document for each section.  Otherwise, each section is followed
+    // by an end of document character.  This ensures that all indexes use the
+    // same document numbering...
+    if (this.currentIndexLevel == null ||
+        this.currentIndexLevel.equals(IndexerInterface.DOCUMENT_LEVEL)) {
+        extraBuffer.append(END_OF_DOCUMENT);
+    }
+    else {
+        textBuffer.append(END_OF_DOCUMENT);
+        this.documentSeqNo ++;
+    }
+    // produce the body here for metadata output of divisions - in the case of
+      // text output, that will happen below...
+      if (!this.currentIndexField.equals("text"))
+      { METSDescriptive descriptive;
+    METSDivision division = (METSDivision) structure;
+    String metadataId = division.getDefaultMetadataReference();
+    descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
+    if (descriptive != null) {
+      List values = descriptive.getMetadata(namespace, this.currentIndexField);
+      if (values != null) {
+        Iterator valueIter = values.iterator();
+        while (valueIter.hasNext()) {
+          String value = valueIter.next().toString();
+          textBuffer.append(value);
+          if (valueIter.hasNext()) {
+          //textBuffer.append(END_OF_SECTION);
+          }
+        }
+      }
+    }
+      }
+    }
+    // go through our children as required...
+    Iterator children = structure.getChildIterator();
+    Node startNode;
+    while (children.hasNext()) {
+      AbstractStructure child = (AbstractStructure) children.next();
+      // get xpointer for child
+      // get start position node
+      if (metsDoc.getDocumentType() == "METS"){
+      startNode = ((METSDocument) metsDoc).getSectionStartNode((METSDivision) child);
+      } else {
+      startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child);
+      }
+        //Node startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child);
+      // while this node isn't the child's start node, produce the HTML node text, if
+      // in text field mode...
+      if (this.currentIndexField.equals("text")) {
+    while (node != startNode) {
+      XPointer.printNode(node, textBuffer, false);
+      // print buffer to node
+      node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
+    }
+      }
+      // recurse to child
+      node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // name, namespace, field);
+    } // while next child
+    // close a document - the actual closing \B will be done by the main
+    // loop, so only a required \C is printed here...
+    if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) {
+      while (node != null) {
+    if (this.currentIndexField.equals("text")) {
+      XPointer.printNode(node, textBuffer, false);
+    }
+    node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
+      }
+      //textBuffer.append(END_OF_SECTION);
+      this.sectionSeqNo ++;
+    }
+    return node;
+  }
+    private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace)
+    //  String name, String namespace, String field)
+  { StringBuffer extraBuffer = new StringBuffer();
+    Node node = document.getDocumentElement();
+    StringBuffer textBuffer = new StringBuffer();
+    this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //name, namespace, field);
+    textBuffer.append(extraBuffer.toString());
+    return textBuffer.toString();
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 8966

Legend:

trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java

Download in other formats: