Changeset 12191 for trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
- Timestamp:
- 2006-07-13T10:29:55+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
r12188 r12191 18 18 import org.greenstone.gsdl3.gs3build.doctypes.DocumentID; 19 19 import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface; 20 import org.greenstone.gsdl3.gs3build.doctypes.AbstractDocument; 20 21 import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocument; 21 22 import org.greenstone.gsdl3.gs3build.doctypes.METSDocument; … … 24 25 import org.greenstone.gsdl3.gs3build.util.DOMUtils; 25 26 import org.greenstone.gsdl3.util.GSXML; 27 import org.greenstone.gsdl3.util.GSFile; 26 28 import org.greenstone.gsdl3.util.Misc; 27 29 import org.greenstone.gsdl3.util.Processing; … … 293 295 //metsDoc.setModified(true); 294 296 // System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel()); 295 } // section level297 } // first pass 296 298 297 299 // append an 'end of section' marker … … 408 410 public boolean indexDocument(DocumentID docID, DocumentInterface document) 409 411 { 410 411 if (!this.firstDocument) { 412 this.indexBuffer.append(END_OF_DOCUMENT); 413 mgPasses.processDocument(indexBuffer.toString()); 414 this.indexBuffer.delete(0, this.indexBuffer.length()); 415 416 } 417 412 int count = ((AbstractDocument)document).getNumSections(); 418 413 String docText = null; 419 414 // set the mgseqno if first pass … … 426 421 427 422 //long start = System.currentTimeMillis(); 428 Document domDocument = document.getDOMDocument(); 429 if (domDocument != null) { 430 System.err.println("dom doc is not null"); 431 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 432 if (sections != null) { 433 System.err.println("sections are not null"); 434 docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field); 435 // System.out.println(docText); 436 } 437 } 423 if (this.current_index.getLevel().equals("section")) { 424 425 Document domDocument = document.getDOMDocument(); 426 if (domDocument != null) { 427 System.err.println("dom doc is not null"); 428 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 429 if (sections != null) { 430 System.err.println("sections are not null"); 431 docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field); 432 // System.out.println(docText); 433 } 434 } 435 } 436 437 438 438 //long finish = System.currentTimeMillis(); 439 439 //System.err.println("dom doc = "+ Long.toString(finish-start)); … … 447 447 if (field.equals("text")) { 448 448 doc_text_buffer.append(document.getDocumentText()); 449 doc_text_buffer.append(" "); 449 450 } else { 450 451 // its a metadata - do namespace properly!! … … 455 456 String value = valueIter.next().toString(); 456 457 doc_text_buffer.append(value); 458 doc_text_buffer.append(" "); 457 459 } 458 460 } … … 461 463 docText = doc_text_buffer.toString(); 462 464 sectionSeqNo ++; 465 int num_secs = 0; 463 466 } 464 467 //finish = System.currentTimeMillis(); … … 467 470 this.indexBuffer.append(docText); 468 471 // remember that we're not on the first document, 469 this.firstDocument = false;472 //this.firstDocument = false; 470 473 this.documentSeqNo ++; 474 //if (!this.firstDocument) { 475 this.indexBuffer.append(END_OF_DOCUMENT); 476 mgPasses.processDocument(indexBuffer.toString()); 477 String filename=""; 478 try { 479 filename = "pass"+this.pass+"doc"+this.documentSeqNo+".txt"; 480 System.err.println("trying to write to "+filename); 481 GSFile.writeFile(indexBuffer.toString().getBytes(), filename); 482 } catch (Exception e) { 483 System.err.println("COUldn't write to file, "+filename); 484 } 485 this.indexBuffer.delete(0, this.indexBuffer.length()); 486 487 471 488 472 489 return true; … … 487 504 this.indexBuffer = new StringBuffer(); 488 505 int indexNo = this.pass/2; 489 this.current_index = null;490 506 491 507 this.current_index = (MGIndex) this.indexes.get(indexNo); … … 512 528 this.textStem = this.indexStem; 513 529 } 530 514 531 mgPasses.setFileName(this.indexStem); 515 532 if (!Misc.isWindows()) { … … 573 590 } catch (Exception e) {} 574 591 575 int exit_value = 0;592 int exit_value = mgPasses.exitValue(); 576 593 System.out.println("Pass " + this.pass + " completed with " + exit_value); 577 594 if (exit_value !=0) {
Note:
See TracChangeset
for help on using the changeset viewer.