Changeset 8440 for trunk/gsdl3/src


Ignore:
Timestamp:
2004-11-03T15:14:05+13:00 (20 years ago)
Author:
kjdon
Message:

modified how the runtime stuff is called - now uses util/Processing class, so won't hang when the output buffers are full. Also removed one unnecessary (I think) end of doc that was getting added

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java

    r8408 r8440  
    2323import org.greenstone.gsdl3.util.GSXML;
    2424import org.greenstone.gsdl3.util.Misc;
     25import org.greenstone.gsdl3.util.Processing;
    2526
    2627public class MGIndexer extends AbstractIndexer
     
    342343   
    343344    if (!this.firstDocument) {
    344         // Send a 'CTRL-B' before the document itself
    345         // try {
    346         //this.indexerTextfeed.write(END_OF_DOCUMENT);
    347345        this.indexBuffer.append(END_OF_DOCUMENT);
    348346        mgPasses.processDocument(indexBuffer.toString());
     
    350348       
    351349    }
    352         //   }
    353         //  catch (IOException ex)
    354         //  { System.out.println("Bad output on end of document" + ex);
    355       //    ex.printStackTrace();
    356       //    return false;
    357       //  }
    358    
    359350
    360351    String docText = null;
     
    374365    }
    375366    if (docText == null) {
    376         System.err.println("doc text is null");
     367        System.err.println("dom doc or sections was null - asking for doc text");
    377368        if (this.currentIndexField.equals("text")) {
    378         docText = Character.toString(END_OF_DOCUMENT) /*+ Character.toString(END_OF_SECTION)*/ + document.getDocumentText();
    379         System.err.println("prepending EOD to doctext");
    380    
     369        //docText = Character.toString(END_OF_DOCUMENT) + document.getDocumentText();
     370        docText = document.getDocumentText();
    381371        }
    382372        else {
    383373        StringBuffer textBuffer = new StringBuffer();
    384         textBuffer.append(END_OF_DOCUMENT);
    385         System.err.println("* appending EOD to text");
    386    
    387         //textBuffer.append(END_OF_SECTION);
     374        //textBuffer.append(END_OF_DOCUMENT);
    388375        List values = document.getDocumentMetadataItem("gsdl3", this.currentIndexField);
    389376        if (values != null) {
     
    409396   
    410397    this.indexBuffer.append(docText);
    411     //byte [] bytes = docText.getBytes();
    412     //int pos = 0, end = bytes.length;
    413    
    414     /*
    415       try {
    416       while (pos < end) {
    417       //this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
    418       this.indexBuffer.append((char [])bytes, pos, (end - pos > 512 ? 512 : end - pos));
    419     pos = pos + 512;
    420      
    421     try {
    422       while (this.indexerFeedback.available() > 0)
    423           { byte b[] = new byte[this.indexerFeedback.available()];
    424         System.out.println("Feedback of " + this.indexerFeedback.available());
    425         this.indexerFeedback.read(b);
    426         System.out.println(b);
    427       }
    428     }
    429     catch (IOException ex)
    430         { System.out.println(ex);
    431     }
    432 
    433 
    434     try {
    435       while (this.indexerErrors.available() > 0)
    436       { byte b[] = new byte[this.indexerErrors.available()];
    437         System.out.println("Feedback of " + this.indexerErrors.available());
    438         this.indexerErrors.read(b);
    439         System.out.println(new String(b));
    440       }
    441     }
    442     catch (IOException ex)
    443         { System.out.println(ex);
    444     }
    445       }
    446     }
    447     catch (IOException ex)
    448     { System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
    449       ex.printStackTrace();
    450       return false;
    451     }
    452     */
    453398    // remember that we're not on the first document,
    454399    this.firstDocument = false;
    455400    // assign the sequence number on the first pass only, and increment the sequence number.
    456401    if (this.pass == 0) {
    457     //document.addDocumentMetadata("gsdl3", "mgseqno", "dtx."+Integer.toString(startSeqNo));
    458402    document.addDocumentMetadata("gsdl3", "mgseqno", this.overallName+"."+Integer.toString(startSeqNo));
    459       //System.out.println("Assigning " + startSeqNo + " to " + document.getID());
    460403    }
    461404    this.documentSeqNo += 1;
    462405
    463     //    try {
    464 //       while (this.indexerErrors.available() > 0)
    465 //       { char c = (char) this.indexerErrors.read();
    466 //         System.out.println(c);
    467 //       }
    468 //       while (this.indexerFeedback.available() > 0)
    469 //       { byte b[] = new byte[this.indexerFeedback.available()];
    470 //         System.out.println("Feedback of " + this.indexerFeedback.available());
    471 //  this.indexerFeedback.read(b);
    472 //       }
    473 //    }
    474 //     catch (IOException ex)
    475 //     {
    476 //     }
    477406    return true;
    478407  }
     
    481410     *  Initialise the pass: open required files, check status
    482411     */
    483     public boolean startPass(int passNumber)
    484     {   
    485    
    486       this.pass = passNumber;
    487       this.firstDocument = true;
    488       this.documentSeqNo = 1;
    489       this.sectionSeqNo  = 1;
    490 
    491       this.mgPasses = new MGPassesWrapper();
    492       this.indexBuffer = new StringBuffer();
    493       int indexNo = (this.pass - 2) / 2;
    494       MGIndex index = null;
    495       if (this.pass >= 2) {
    496       index = (MGIndex) this.indexes.get(indexNo);
    497       if (index.hasError()) {
    498           // an error has already occurred for this index, don't continue
    499           System.out.println("pass "+this.pass+": aborted due to errors in the previous pass");
    500           return false;
    501       }
    502       // attempt to ensure that the text subdirectory exists
    503       //this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField()));
    504       this.indexDirectory = new File(outputDirectory, index.getName());
    505       if (!indexDirectory.exists()) {
    506           if (!indexDirectory.mkdir()) {
    507           return false;
    508           }
    509       }
    510       else if (!indexDirectory.isDirectory()) {
    511           return false;
    512       }
    513 
    514       this.currentIndexLevel = index.getLevel();
    515       this.currentIndexField = index.getField();
    516       this.currentIndexName = index.getName();
    517      
    518     if (this.currentIndexLevel == null || this.currentIndexField == null ) {
    519         System.out.println("invalid index - level or field was null");
    520         return false;
    521     }
    522     //if (this.currentIndexName == null || this.currentIndexName.length() == 0) {
    523     // this.currentIndexName = getIndexDirectory(index.getLevel(), index.getField());
    524     //  }
    525     this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM;  // TODO: modify for index
    526     if (this.pass % 2 == 1) {
    527         this.currentIndexName = null; // why???
    528     }
    529       }
    530       else {
    531      
    532       this.currentIndexField = "text";
    533       this.currentIndexLevel = "section";
    534       this.currentIndexName = null;
    535       }
    536      
    537       // get the parameters for this execution of mg_passes
    538       //String pathParams = "-f index -d " + (this.pass < 2 ? this.textDirectory.toString() : this.indexDirectory.toString());
    539       mgPasses.setFileName((this.pass < 2 ? this.textDirectory.toString() : this.indexDirectory.toString())+File.separator+ "index");
    540       if (!Misc.isWindows()) {
    541       mgPasses.setBasePath("/");
    542       }
    543       int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
    544      
    545       mgPasses.setBufferSize(100000);
    546       // try {
    547       // TODO add the other options to mg passes
     412    public boolean startPass(int passNumber) {
     413       
     414   
     415    this.pass = passNumber;
     416    this.firstDocument = true;
     417    this.documentSeqNo = 1;
     418    this.sectionSeqNo  = 1;
     419   
     420    this.mgPasses = new MGPassesWrapper();
     421    this.indexBuffer = new StringBuffer();
     422    int indexNo = (this.pass - 2) / 2;
     423    MGIndex index = null;
     424    if (this.pass >= 2) {
     425        index = (MGIndex) this.indexes.get(indexNo);
     426        if (index.hasError()) {
     427        // an error has already occurred for this index, don't continue
     428        System.out.println("pass "+this.pass+": aborted due to errors in the previous pass");
     429        return false;
     430        }
     431        // attempt to ensure that the text subdirectory exists
     432        this.indexDirectory = new File(outputDirectory, index.getName());
     433        if (!indexDirectory.exists()) {
     434        if (!indexDirectory.mkdir()) {
     435            return false;
     436        }
     437        }
     438        else if (!indexDirectory.isDirectory()) {
     439        return false;
     440        }
     441       
     442        this.currentIndexLevel = index.getLevel();
     443        this.currentIndexField = index.getField();
     444        this.currentIndexName = index.getName();
     445       
     446        if (this.currentIndexLevel == null || this.currentIndexField == null ) {
     447        System.out.println("invalid index - level or field was null");
     448        return false;
     449        }
     450        this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM;  // TODO: modify for index
     451        if (this.pass % 2 == 1) {
     452        this.currentIndexName = null; // why???
     453        }
     454    }
     455    else {
     456       
     457        this.currentIndexField = "text";
     458        this.currentIndexLevel = "section";
     459        this.currentIndexName = null;
     460    }
     461   
     462    // get the parameters for this execution of mg_passes
     463    mgPasses.setFileName((this.pass < 2 ? this.textDirectory.toString() : this.indexDirectory.toString())+File.separator+ "index");
     464    if (!Misc.isWindows()) {
     465        mgPasses.setBasePath("/");
     466    }
     467    int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
     468   
     469    mgPasses.setBufferSize(100000);
     470   
    548471    switch (mgPass) {
    549472    case 0:
    550           //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams + " -b 100000 -T1");
     473        // -b 100000 -T1
    551474        mgPasses.addPass(MGPassesWrapper.TEXT_PASS_1);
    552475       
    553476       
    554477        break;
    555 
    556       case 1:
    557           //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -T2");
    558           mgPasses.addPass(MGPassesWrapper.TEXT_PASS_2);
     478       
     479    case 1:
     480        // -b 100000 -T2
     481        mgPasses.addPass(MGPassesWrapper.TEXT_PASS_2);
    559482        break;
    560483       
    561       case 2:
    562           //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams + " -b 100000 -2 -m 32 -s 0 -G -t 10 -N1");
     484    case 2:
     485        // -b 100000 -2 -m 32 -s 0 -G -t 10 -N1
    563486        mgPasses.addPass(MGPassesWrapper.INDEX_PASS_1);
    564487        mgPasses.setInvfLevel(MGPassesWrapper.INVF_LEVEL_2);
     
    568491        break;
    569492       
    570       case 3:
    571           //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -2 -c 3 -G -t 10 -N2");
     493    case 3:
     494        // -b 100000 -2 -c 3 -G -t 10 -N2
    572495        mgPasses.addPass(MGPassesWrapper.INDEX_PASS_2);
    573496        mgPasses.setInvfLevel(MGPassesWrapper.INVF_LEVEL_2);
     
    575498        break;
    576499    }
    577        
     500   
    578501    mgPasses.init();
    579     //  this.indexerFeedback = mg_passes.getInputStream();
    580     //this.indexerErrors   = mg_passes.getErrorStream();
    581     //this.indexerTextfeed = mg_passes.getOutputStream();
    582     // }
    583     //      catch (IOException ex)
    584     // { System.out.println(ex);
    585     // ex.printStackTrace();
    586     //  index.setError(true);
    587     //  return false;
    588     // }   
    589       //        catch (InterruptedException ex)
    590 //        { System.out.println(ex);
    591 //          ex.printStackTrace();
    592 //      index.setError(true);
    593 //      return false;
    594 //        }
    595       System.out.println("Pass " + this.pass);
    596       return true;
     502    System.out.println("Pass " + this.pass);
     503    return true;
    597504    }
    598505 
    599     public void printProcessOutput(Process p)
    600     throws IOException {
    601     BufferedReader error_stream = new BufferedReader(new InputStreamReader( p.getErrorStream(), "UTF-8" ));
    602     BufferedReader output_stream = new BufferedReader(new InputStreamReader( p.getInputStream(), "UTF-8" ));
    603     while (output_stream.ready()) {
    604         System.err.println("out> "+output_stream.readLine());
    605     }
    606     while (error_stream.ready()) {
    607         System.err.println("err> "+error_stream.readLine());
    608     }
    609    
    610     }
    611506    /**
    612507     *  Complete a pass - reset file counters, close files, etc.
    613508     */
    614     public boolean endPass(int passNumber)
    615     { Process p;
     509    public boolean endPass(int passNumber) {
     510    Process p;
    616511   
    617512    int indexNo = (passNumber - 2) / 2;
    618513    MGIndex index = null;
    619     if (passNumber >= 2) {
    620     index = (MGIndex) this.indexes.get(indexNo);
    621     }
    622     try {
    623     //this.indexerTextfeed.write(END_OF_DOCUMENT);
    624     //this.indexerTextfeed.write(END_OF_STREAM);
    625     this.indexBuffer.append(END_OF_DOCUMENT);
    626     mgPasses.processDocument(indexBuffer.toString());
    627     this.indexBuffer.delete(0, this.indexBuffer.length());
    628 //  while (this.indexerErrors.available() > 0)
    629 //  { char c = (char) this.indexerErrors.read();
    630 //    System.out.print(c);
    631 //  }
    632 //  while (this.indexerFeedback.available() > 0)
    633 //  { byte b[] = new byte[this.indexerFeedback.available()];
    634 //    System.out.print("Feedback of " + this.indexerFeedback.available());
    635 //    this.indexerFeedback.read(b);
    636 //  }
    637 
    638     //this.indexerTextfeed.close();
    639     Thread.sleep(1000);
    640     //this.mg_passes.waitFor();
    641     }
    642 //     catch (IOException ex)
    643 //       { System.out.println(ex);
    644 //       }
    645       catch (InterruptedException ex)
    646       { System.out.println(ex);
    647       }
    648     //    int exitValue = this.mg_passes.exitValue();
    649     mgPasses.finish();
    650     try {
    651     Thread.sleep(1000);
    652     } catch (Exception e) {}
    653     int exitValue = 0;
    654     System.out.println("Pass " + this.pass + " completed with " + exitValue);
    655       if (exitValue !=0) {
    656       //assume something has gone wrong, don't continue
    657       if (index != null) {
    658           index.setError(true);
    659           return false;
    660       }
    661       }
    662       int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
    663       String osextra = "";
    664       if (!Misc.isWindows()) {
    665       osextra = " -d / ";
    666       }
    667       try {
    668     switch (mgPass)
    669     {
    670       case 0:
     514    if (passNumber >= 2) {
     515        index = (MGIndex) this.indexes.get(indexNo);
     516    }
     517    try {
     518        this.indexBuffer.append(END_OF_DOCUMENT);
     519        mgPasses.processDocument(indexBuffer.toString());
     520        this.indexBuffer.delete(0, this.indexBuffer.length());
     521        Thread.sleep(1000); // what for??
     522    }
     523    catch (InterruptedException ex) {
     524        System.out.println(ex);
     525    }
     526    mgPasses.finish();
     527    try {
     528        Thread.sleep(1000);
     529    } catch (Exception e) {}
     530   
     531    int exit_value = 0;
     532    System.out.println("Pass " + this.pass + " completed with " + exit_value);
     533    if (exit_value !=0) {
     534        //assume something has gone wrong, don't continue
     535        if (index != null) {
     536        index.setError(true);
     537        return false;
     538        }
     539    }
     540    int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
     541    String osextra = "";
     542    if (!Misc.isWindows()) {
     543        osextra = " -d / ";
     544    }
     545
     546    switch (mgPass) {
     547       
     548    case 0:
    671549        System.out.println("Compressing dictionary");
    672         p = Runtime.getRuntime().exec("mg_compression_dict -f " + this.textDirectory.toString()+File.separator+"index" + osextra + " -S -H -2 -k 5120");
    673         p.waitFor();
    674         printProcessOutput(p);
    675         if (p.exitValue() != 0) {
    676           System.out.println("Error from mg_compression_dict: " + p.exitValue());
    677           index.setError(true);
    678          
    679           return false;
    680         }
    681         else {
    682           System.out.println("Compressed dictionary successfully written");
    683         }
    684       break;
    685 
     550        exit_value = Processing.runProcess("mg_compression_dict -f " + this.textDirectory.toString()+File.separator+"index" + osextra + " -S -H -2 -k 5120");
     551        if (exit_value == 0) {
     552        System.out.println("Compressed dictionary successfully written");
     553        } else {
     554        System.err.println("Error from mg_compression_dict: " + exit_value);
     555        index.setError(true);
     556       
     557        return false;
     558        }
     559        break;
     560       
    686561    case 2:
    687562        System.out.println("Creating perfect hash");
    688         p = Runtime.getRuntime().exec("mg_perf_hash_build -f " + this.indexDirectory.toString()+File.separator+ "index"+osextra);
    689         p.waitFor();
    690         printProcessOutput(p);
    691         if (p.exitValue() == 0) {
    692           System.out.println("Perfect hashes completed");
     563        exit_value = Processing.runProcess("mg_perf_hash_build -f " + this.indexDirectory.toString()+File.separator+ "index"+osextra);
     564        if (exit_value ==0) {
     565        System.out.println("Perfect hashes completed");
    693566        } else {
    694         System.out.println("Unable to build the perfect hash");
     567        System.err.println("Unable to build the perfect hash");
    695568        index.setError(true);
    696569        return false;
    697570        }
    698571        break;
    699 
    700       case 3:
     572       
     573    case 3:
    701574        System.out.println("Writing weights file");
    702         p = Runtime.getRuntime().exec("mg_weights_build -f " + this.indexStem + " -t " + this.textStem + osextra);
    703         p.waitFor();
    704         printProcessOutput(p);
    705         if (p.exitValue() == 0) {
    706           System.out.println("Weights file successfully written");
    707         }
    708         else {
    709           System.out.println("Unable to create weights file");
    710           index.setError(true);
    711           return false;
    712 
    713         }
    714 
    715         p = Runtime.getRuntime().exec("mg_invf_dict -f " + this.indexDirectory.toString()+File.separator+"index" + osextra);
    716         p.waitFor();
    717         printProcessOutput(p);
    718         if (p.exitValue() == 0) {
    719           System.out.println("Inverted dictionary file successfully written");
    720         }
    721         else {
    722           System.out.println("Unable to create inverted dictionary file");
    723           index.setError(true);
    724           return false;
    725 
    726         }
    727        
    728         p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s1 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
    729         p.waitFor();
    730         printProcessOutput(p);
    731         if (p.exitValue() == 0) {
    732           System.out.println("Stemmed index 1 successfully written");
    733         }
    734         else {
    735           System.out.println("Unable to create stemmed index 1");
    736           index.setError(true);
    737           return false;
    738 
    739         }
    740 
    741         p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s2 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
    742         p.waitFor();
    743         printProcessOutput(p);
    744         if (p.exitValue() == 0) {
    745           System.out.println("Stemmed index 2 successfully written");
    746         }
    747         else {
    748           System.out.println("Unable to create stemmed index 2");
    749           index.setError(true);
    750           return false;
    751         }
    752 
    753         p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s3 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
    754         p.waitFor();
    755         printProcessOutput(p);
    756         if (p.exitValue() == 0) {
    757           System.out.println("Stemmed index 3 successfully written");
    758         }
    759         else {
    760           System.out.println("Unable to create stemmed index 3");
    761           index.setError(true);
    762           return false;
    763         }
    764       break;
    765     }
    766       }
    767       catch (IOException ex)
    768       { System.out.println(ex);
    769         ex.printStackTrace();
    770     index.setError(true);
    771     return false;
    772       }
    773       catch (InterruptedException ex)
    774       { System.out.println(ex);
    775         ex.printStackTrace();
    776     index.setError(true);
    777     return false;
    778       }
    779       mgPasses = null;
    780       return true;
    781     }
    782 
     575        exit_value = Processing.runProcess("mg_weights_build -f " + this.indexStem + " -t " + this.textStem + osextra);
     576        if (exit_value ==0) {
     577        System.out.println("Weights file successfully written");
     578        } else {
     579        System.err.println("Unable to create weights file");
     580        index.setError(true);
     581        return false;
     582        }
     583       
     584        System.out.println("Creating inverted dictionary");
     585        exit_value = Processing.runProcess("mg_invf_dict -f " + this.indexDirectory.toString()+File.separator+"index" + osextra);
     586        if (exit_value ==0) {
     587        System.out.println("Inverted dictionary file successfully written");
     588        } else {
     589        System.out.println("Unable to create inverted dictionary file");
     590        index.setError(true);
     591        return false;
     592        }
     593       
     594        System.out.println("Creating Stem indexes");
     595        exit_value = Processing.runProcess("mg_stem_idx -b 4096 -s1 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
     596        if (exit_value == 0) {
     597        System.out.println("Stemmed index 1 successfully written");
     598        } else {
     599        System.out.println("Unable to create stemmed index 1");
     600        index.setError(true);
     601        return false;
     602        }
     603       
     604        exit_value = Processing.runProcess("mg_stem_idx -b 4096 -s2 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
     605        if (exit_value == 0) {
     606        System.out.println("Stemmed index 2 successfully written");
     607        } else {
     608        System.out.println("Unable to create stemmed index 2");
     609        index.setError(true);
     610        return false;
     611        }
     612        exit_value = Processing.runProcess("mg_stem_idx -b 4096 -s3 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
     613        if (exit_value == 0) {
     614        System.out.println("Stemmed index 3 successfully written");
     615        } else {
     616        System.out.println("Unable to create stemmed index 3");
     617        index.setError(true);
     618        return false;
     619        }
     620       
     621        break;
     622    } // switch
     623
     624    mgPasses = null;
     625    return true;
     626    }
     627   
    783628    /**
    784629     *  Do any tidying up
     
    796641
    797642    public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) {
    798     System.out.println("adding service description, MGIndexer");
    799643    Document doc = service_rack_list.getOwnerDocument();
    800644
Note: See TracChangeset for help on using the changeset viewer.