Ignore:
Timestamp:
2006-07-13T10:29:55+12:00 (18 years ago)
Author:
kjdon
Message:

committed some changes that I had made ages ago. Not sure if it still compiles - I need to write an ant build file for this, and check compilation. Will do it once I need to - its unclear whether anyone will ever use this again

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java

    r12188 r12191  
    1818import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
    1919import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
     20import org.greenstone.gsdl3.gs3build.doctypes.AbstractDocument;
    2021import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocument;
    2122import org.greenstone.gsdl3.gs3build.doctypes.METSDocument;
     
    2425import org.greenstone.gsdl3.gs3build.util.DOMUtils;
    2526import org.greenstone.gsdl3.util.GSXML;
     27import org.greenstone.gsdl3.util.GSFile;
    2628import org.greenstone.gsdl3.util.Misc;
    2729import org.greenstone.gsdl3.util.Processing;
     
    293295        //metsDoc.setModified(true);
    294296        //  System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
    295         } // section level
     297        } // first pass
    296298   
    297299        // append an 'end of section' marker
     
    408410    public boolean indexDocument(DocumentID docID, DocumentInterface document)
    409411    {
    410    
    411     if (!this.firstDocument) {
    412         this.indexBuffer.append(END_OF_DOCUMENT);
    413         mgPasses.processDocument(indexBuffer.toString());
    414         this.indexBuffer.delete(0, this.indexBuffer.length());
    415        
    416     }
    417 
     412    int count = ((AbstractDocument)document).getNumSections();
    418413    String docText = null;
    419414    // set the mgseqno if first pass
     
    426421   
    427422    //long start = System.currentTimeMillis();
    428     Document domDocument = document.getDOMDocument();
    429     if (domDocument != null) {
    430         System.err.println("dom doc is not null");
    431         METSStructure sections = document.getDocumentStructure().getStructure("Section");
    432         if (sections != null) {
    433         System.err.println("sections are not null");
    434         docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field);
    435         //  System.out.println(docText);
    436         }
    437     }
     423    if (this.current_index.getLevel().equals("section")) {
     424
     425        Document domDocument = document.getDOMDocument();
     426        if (domDocument != null) {
     427        System.err.println("dom doc is not null");
     428        METSStructure sections = document.getDocumentStructure().getStructure("Section");
     429        if (sections != null) {
     430            System.err.println("sections are not null");
     431            docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field);
     432            //  System.out.println(docText);
     433        }
     434        }
     435    }
     436       
     437   
    438438    //long finish = System.currentTimeMillis();
    439439    //System.err.println("dom doc = "+ Long.toString(finish-start));
     
    447447        if (field.equals("text")) {
    448448            doc_text_buffer.append(document.getDocumentText());
     449            doc_text_buffer.append(" ");
    449450        }  else {
    450451            // its a metadata - do namespace properly!!
     
    455456                String value = valueIter.next().toString();
    456457                doc_text_buffer.append(value);
     458                doc_text_buffer.append(" ");
    457459            }
    458460            }
     
    461463        docText = doc_text_buffer.toString();
    462464        sectionSeqNo ++;
     465        int num_secs = 0;
    463466    }
    464467    //finish = System.currentTimeMillis();
     
    467470    this.indexBuffer.append(docText);
    468471    // remember that we're not on the first document,
    469     this.firstDocument = false;
     472    //this.firstDocument = false;
    470473    this.documentSeqNo ++;
     474    //if (!this.firstDocument) {
     475    this.indexBuffer.append(END_OF_DOCUMENT);
     476    mgPasses.processDocument(indexBuffer.toString());
     477    String filename="";
     478    try {
     479        filename = "pass"+this.pass+"doc"+this.documentSeqNo+".txt";
     480        System.err.println("trying to write to "+filename);
     481        GSFile.writeFile(indexBuffer.toString().getBytes(), filename);
     482    } catch (Exception e) {
     483        System.err.println("COUldn't write to file, "+filename);
     484    }
     485    this.indexBuffer.delete(0, this.indexBuffer.length());
     486       
     487   
    471488
    472489    return true;
     
    487504    this.indexBuffer = new StringBuffer();
    488505    int indexNo = this.pass/2;
    489     this.current_index = null;
    490506   
    491507    this.current_index = (MGIndex) this.indexes.get(indexNo);
     
    512528        this.textStem = this.indexStem;
    513529    }
     530   
    514531    mgPasses.setFileName(this.indexStem);
    515532    if (!Misc.isWindows()) {
     
    573590    } catch (Exception e) {}
    574591   
    575     int exit_value = 0;
     592    int exit_value = mgPasses.exitValue();
    576593    System.out.println("Pass " + this.pass + " completed with " + exit_value);
    577594    if (exit_value !=0) {
Note: See TracChangeset for help on using the changeset viewer.