Changeset 8966
- Timestamp:
- 2005-02-04T12:11:45+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGPPIndexer.java
r8927 r8966 1 1 package org.greenstone.gsdl3.gs3build.indexers; 2 3 import org.greenstone.mgpp.MGPPPassesWrapper; 2 4 3 5 import org.greenstone.gsdl3.gs3build.doctypes.DocumentID; 4 6 import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface; 5 7 import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocument; 8 import org.greenstone.gsdl3.gs3build.doctypes.METSDocument; 9 import org.greenstone.gsdl3.util.Misc; 10 import org.greenstone.gsdl3.util.GSXML; 11 import org.greenstone.gsdl3.util.Processing; 12 import org.greenstone.gsdl3.gs3build.xpointer.XPointer; 13 import org.greenstone.gsdl3.gs3build.metadata.*; 6 14 import java.io.InputStream; 7 15 import java.io.OutputStream; 8 16 import java.io.IOException; 17 import java.util.ArrayList; 18 import java.util.List; 19 import java.io.File; 20 import java.util.Iterator; 9 21 10 22 import org.w3c.dom.Element; 23 import org.w3c.dom.Node; 24 import org.w3c.dom.Document; 11 25 12 26 public class MGPPIndexer extends AbstractIndexer … … 14 28 int pass; 15 29 int documentSeqNo; 30 int sectionSeqNo; 16 31 String name; 17 32 boolean firstDocument; 33 File indexDirectory; 34 File textDirectory; 35 String indexStem; 36 String textStem; 37 StringBuffer indexBuffer; 18 38 String outputDirectory; 19 String outputStem; 20 String passExtra; 21 InputStream indexerFeedback; 22 InputStream indexerErrors; 23 OutputStream indexerTextfeed; 24 Process mgpp_passes; 39 //String outputStem; 40 // String passExtra; 41 // InputStream indexerFeedback; 42 // InputStream indexerErrors; 43 // OutputStream indexerTextfeed; 44 // Process mgpp_passes; 45 //String overallName; 46 String currentIndexName; 47 String currentIndexLevel; 48 String currentIndexField; 49 MGPPPassesWrapper mgppPasses; 25 50 51 26 52 static final String documentSeparator = "<Document>"; 27 53 static final String sectionSeparator = "<Section>"; 28 54 55 static final String START_OF_DOCUMENT = "<Document>"; 56 static final String END_OF_DOCUMENT = "</Document>"; 57 static final String START_OF_SECTION = "<Section>"; 58 static final String END_OF_SECTION = "</Section>"; 59 60 29 61 public static final String MGPP_INDEX_TYPE = "mgpp"; 30 62 public static final String INDEX_FILE_STEM = "index"; 31 63 64 65 class MGPPIndex 66 { 67 public String name = null; 68 public String doc_level = null; 69 public ArrayList levels = null; 70 public ArrayList fields = null; 71 boolean error = false;// assume built until we get an error 72 73 public MGPPIndex(String name) { 74 this.name = name; 75 doc_level = "Document"; 76 } 77 78 public void setDocLevel(String doc_level) { 79 this.doc_level = doc_level; 80 } 81 82 public void addLevel(String level) { 83 this.levels.add(level); 84 } 85 86 // change to allow nested fields 87 public void addField(String field) { 88 this.fields.add(field); 89 } 90 91 public boolean hasError() { 92 return this.error; 93 } 94 public void setError(boolean b) { 95 this.error = b; 96 } 97 98 } 99 100 32 101 public MGPPIndexer(String name) 33 102 { … … 47 116 public boolean configure(String label, String value) 48 117 { 49 if (label.equals( "outputDir")) {118 if (label.equals(IndexerManager.outputDir)) { 50 119 this.outputDirectory = value; 51 this.outputStem = value + "/index"; 120 121 // attempt to ensure that the text subdirectory exists 122 this.textDirectory = new File(outputDirectory, "text"); 123 if (!textDirectory.exists()) { 124 if (!textDirectory.mkdir()) { 125 return false; 126 } 127 } 128 else if (!textDirectory.isDirectory()) { 129 return false; 130 } 131 this.textStem = this.textDirectory.getPath() + File.separator + INDEX_FILE_STEM; 132 133 // attempt to ensure that the index subdir exists 134 this.indexDirectory = new File(outputDirectory, "idx"); 135 if (!indexDirectory.exists()) { 136 if (!indexDirectory.mkdir()) { 137 return false; 138 } 139 } 140 else if (!indexDirectory.isDirectory()) { 141 return false; 142 } 143 this.indexStem = this.indexDirectory.getPath() + File.separator + INDEX_FILE_STEM; 144 145 // Sign to the user which mg directory is being used... 146 System.out.println("Output MGPP text directory is " + this.textStem); 147 System.out.println("Output MGPP index directory is " + this.indexStem); 52 148 } 53 149 this.pass = 0; … … 62 158 public boolean addIndex(String name, String level, String field) 63 159 { 64 if (level == "doc_level") {65 passExtra = " -J " + level;66 }67 else {68 passExtra = " -K " + level;69 }160 // if (level == "doc_level") { 161 // passExtra = " -J " + level; 162 // } 163 // else { 164 // passExtra = " -K " + level; 165 // } 70 166 return true; 71 167 } … … 82 178 } 83 179 180 // why do this at the start and not at the end??? 84 181 if (!this.firstDocument) { 85 // Send a '<document>' before the document itself 86 try { 87 this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 88 } 89 catch (IOException ex) { 90 System.out.println("Bad output on end of document" + ex); 91 ex.printStackTrace(); 92 return false; 93 } 94 } 95 96 String docText = document.getDocumentText(); 97 int startSeqNo = this.documentSeqNo; 98 99 byte [] bytes = docText.getBytes(); 100 int pos = 0, end = bytes.length; 101 102 try { 103 while (pos < end) { 104 this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 105 pos = pos + 512; 106 107 try { 108 while (this.indexerFeedback.available() > 0) { 109 byte b[] = new byte[this.indexerFeedback.available()]; 110 System.out.println("Feedback of " + this.indexerFeedback.available()); 111 this.indexerFeedback.read(b); 112 System.out.println(b); 113 } 114 } 115 catch (IOException ex) { 182 // Send a '</Document>' at the end of the doc 183 this.indexBuffer.append(END_OF_DOCUMENT); 184 mgppPasses.processDocument(indexBuffer.toString()); 185 this.indexBuffer.delete(0, this.indexBuffer.length()); 186 } 187 188 String docText = null; 189 190 int startSeqNo = this.sectionSeqNo; 191 this.sectionSeqNo ++; 192 193 Document domDocument = document.getDOMDocument(); 194 if (domDocument != null) { 195 System.err.println("dom doc is not null"); 196 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 197 if (sections != null) { 198 System.err.println("sections are not null"); 199 docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field); 200 // System.out.println(docText); 201 } 202 } 203 if (docText == null) { 204 System.err.println("dom doc or sections was null - asking for doc text"); 205 //if (this.currentIndexField.equals("text")) { 206 //docText = Character.toString(END_OF_DOCUMENT) + document.getDocumentText(); 207 docText = document.getDocumentText(); 208 //} 209 // else { 210 // StringBuffer textBuffer = new StringBuffer(); 211 // //textBuffer.append(END_OF_DOCUMENT); 212 // List values = document.getDocumentMetadataItem("gsdl3", this.currentIndexField); 213 // if (values != null) { 214 // Iterator valueIter = values.iterator(); 215 // while (valueIter.hasNext()) { 216 // String value = valueIter.next().toString(); 217 218 // textBuffer.append(value); 219 // if (valueIter.hasNext()) { 220 // //textBuffer.append(END_OF_SECTION); 221 // // sectionSeqNo ++; 222 // } 223 // } 224 // } 225 // else { 226 // textBuffer.append("No data"); 227 // } 228 // docText = textBuffer.toString(); 229 // } 230 sectionSeqNo ++; 231 } 232 233 //try { 234 // this.indexerTextfeed.write(documentSeparator.getBytes(), 0, documentSeparator.getBytes().length); 235 // } 236 // catch (IOException ex) { 237 // System.out.println("Bad output on end of document" + ex); 238 // ex.printStackTrace(); 239 // return false; 240 // } 241 // } 242 243 this.indexBuffer.append(START_OF_DOCUMENT); 244 //String docText = document.getDocumentText(); 245 this.indexBuffer.append(docText); 246 //int startSeqNo = this.documentSeqNo; 247 248 // byte [] bytes = docText.getBytes(); 249 // int pos = 0, end = bytes.length; 250 251 // try { 252 // while (pos < end) { 253 // this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos)); 254 // pos = pos + 512; 255 256 // try { 257 // while (this.indexerFeedback.available() > 0) { 258 // byte b[] = new byte[this.indexerFeedback.available()]; 259 // System.out.println("Feedback of " + this.indexerFeedback.available()); 260 // this.indexerFeedback.read(b); 261 // System.out.println(b); 262 // } 263 // } 264 // catch (IOException ex) { 116 265 117 }118 119 120 try {121 while (this.indexerErrors.available() > 0) {122 byte b[] = new byte[this.indexerErrors.available()];123 System.out.println("Feedback of " + this.indexerErrors.available());124 this.indexerErrors.read(b);125 System.out.println(new String(b));126 }127 }128 catch (IOException ex){266 // } 267 268 269 // try { 270 // while (this.indexerErrors.available() > 0) { 271 // byte b[] = new byte[this.indexerErrors.available()]; 272 // System.out.println("Feedback of " + this.indexerErrors.available()); 273 // this.indexerErrors.read(b); 274 // System.out.println(new String(b)); 275 // } 276 // } 277 // catch (IOException ex){ 129 278 130 }131 }132 }133 catch (IOException ex) {134 System.out.println("Bad output during document write " + ex + " " + pos + " " + end);135 ex.printStackTrace();136 return false;137 }279 // } 280 // } 281 // } 282 // catch (IOException ex) { 283 // System.out.println("Bad output during document write " + ex + " " + pos + " " + end); 284 // ex.printStackTrace(); 285 // return false; 286 // } 138 287 this.firstDocument = false; 139 288 … … 143 292 this.documentSeqNo += 1; 144 293 145 try {146 while (this.indexerErrors.available() > 0) {147 char c = (char) this.indexerErrors.read();148 System.out.println(c);149 }150 while (this.indexerFeedback.available() > 0) {151 byte b[] = new byte[this.indexerFeedback.available()];152 System.out.println("Feedback of " + this.indexerFeedback.available());153 this.indexerFeedback.read(b);154 }155 }156 catch (IOException ex) {157 158 }294 // try { 295 // while (this.indexerErrors.available() > 0) { 296 // char c = (char) this.indexerErrors.read(); 297 // System.out.println(c); 298 // } 299 // while (this.indexerFeedback.available() > 0) { 300 // byte b[] = new byte[this.indexerFeedback.available()]; 301 // System.out.println("Feedback of " + this.indexerFeedback.available()); 302 // this.indexerFeedback.read(b); 303 // } 304 // } 305 // catch (IOException ex) { 306 307 // } 159 308 return true; 160 309 } … … 169 318 this.documentSeqNo = 1; 170 319 171 try { 172 switch (this.pass) { 173 case 0: 174 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1"); 175 break; 176 177 case 1: 178 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2"); 179 break; 180 181 case 2: 182 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1"); 183 break; 184 185 case 3: 186 Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 187 p.waitFor(); 188 189 mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2"); 190 break; 191 } 192 193 this.indexerFeedback = mgpp_passes.getInputStream(); 194 this.indexerErrors = mgpp_passes.getErrorStream(); 195 this.indexerTextfeed = mgpp_passes.getOutputStream(); 196 } 197 catch (IOException ex) { 198 System.out.println(ex); 199 ex.printStackTrace(); 200 return false; 201 } 202 catch (InterruptedException ex) { 203 System.out.println(ex); 204 ex.printStackTrace(); 205 return false; 206 } 320 this.mgppPasses = new MGPPPassesWrapper(); 321 this.indexBuffer = new StringBuffer(); 322 323 MGPPIndex index = null; // do something with this!! 324 325 // get the parameters for this execution of mg_passes 326 mgppPasses.setFileName((this.pass < 2 ? this.textStem : this.indexStem )); 327 if (!Misc.isWindows()) { 328 mgppPasses.setBasePath("/"); 329 } 330 331 mgppPasses.setDocumentTag("Document"); 332 //mgppPasses.addLevelTag("Section"); 333 334 this.currentIndexLevel = "Document";// index.getLevel(); 335 this.currentIndexField = "text";//index.getField(); 336 this.currentIndexName = "idx"; //index.getName(); 337 338 339 switch (this.pass) { 340 case 0: 341 // -T1 342 mgppPasses.addPass(MGPPPassesWrapper.TEXT_PASS_1); 343 //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem + " -T1"); 344 break; 345 346 case 1: 347 // -T2 348 mgppPasses.addPass(MGPPPassesWrapper.TEXT_PASS_2); 349 //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -T2"); 350 break; 351 352 case 2: 353 // -I1 354 mgppPasses.addPass(MGPPPassesWrapper.INDEX_PASS_1); 355 //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I1"); 356 break; 357 358 case 3: 359 //Process p = Runtime.getRuntime().exec("mgpp_perf_hash_build -f " + this.outputStem); 360 //p.waitFor(); 361 // -I2 362 mgppPasses.addPass(MGPPPassesWrapper.INDEX_PASS_2); 363 //mgpp_passes = Runtime.getRuntime().exec("mgpp_passes " + passExtra + " -f " + this.outputStem +" -I2"); 364 break; 365 } 366 367 //this.indexerFeedback = mgpp_passes.getInputStream(); 368 // this.indexerErrors = mgpp_passes.getErrorStream(); 369 // this.indexerTextfeed = mgpp_passes.getOutputStream(); 370 // } 371 //catch (IOException ex) { 372 // System.out.println(ex); 373 // ex.printStackTrace(); 374 // return false; 375 //}/ 376 //catch (InterruptedException ex) { 377 // System.out.println(ex); 378 // ex.printStackTrace(); 379 // return false; 380 //} 381 mgppPasses.init(); 207 382 System.out.println("Pass " + this.pass); 208 383 return true; … … 216 391 // TODO: end pass 217 392 Process p; 218 393 MGPPIndex index = null; // do something with this!! 219 394 try { 220 this.indexerTextfeed.write((char) 2); 221 this.indexerTextfeed.write(4); 222 while (this.indexerErrors.available() > 0) { 223 char c = (char) this.indexerErrors.read(); 224 System.out.print(c); 225 } 226 while (this.indexerFeedback.available() > 0) { 227 byte b[] = new byte[this.indexerFeedback.available()]; 228 System.out.print("Feedback of " + this.indexerFeedback.available()); 229 this.indexerFeedback.read(b); 230 } 231 232 this.indexerTextfeed.close(); 233 Thread.sleep(1000); 234 this.mgpp_passes.waitFor(); 235 } 236 catch (IOException ex) { 237 System.out.println(ex); 395 this.indexBuffer.append(END_OF_DOCUMENT); 396 mgppPasses.processDocument(indexBuffer.toString()); 397 this.indexBuffer.delete(0, this.indexBuffer.length()); 398 Thread.sleep(1000); // what for?? 238 399 } 239 400 catch (InterruptedException ex) { 240 401 System.out.println(ex); 241 402 } 242 System.out.println("Completed with " + this.mgpp_passes.exitValue()); 243 403 mgppPasses.finish(); 244 404 try { 245 switch (this.pass) { 246 case 0: 247 System.out.println("Compressing dictionary"); 248 p = Runtime.getRuntime().exec("mgpp_compression_dict -f " + this.outputStem + " -S -H -2 -k 5120"); 249 p.waitFor(); 250 System.out.println(p.exitValue()); 251 break; 405 Thread.sleep(1000); 406 } catch (Exception e) {} 407 408 int exit_value = mgppPasses.exitValue(); 409 System.out.println("Pass " + this.pass + " completed with " + exit_value); 410 if (exit_value !=0) { 411 //assume something has gone wrong, don't continue 412 // if (index != null) { 413 // index.setError(true); 414 // return false; 415 // } 416 } 417 418 String osextra = ""; 419 if (!Misc.isWindows()) { 420 osextra = " -d / "; 421 } 422 423 switch (this.pass) { 424 case 0: 425 //System.exit(1); 426 System.out.println("Compressing dictionary"); 427 exit_value = Processing.runProcess("mgpp_compression_dict -f " + this.textStem + " -S -H -2 -k 5120"+ osextra); 428 429 if (exit_value == 0) { 430 System.out.println("Compressed dictionary successfully written"); 431 } else { 432 System.err.println("Error from mg_compression_dict: " + exit_value); 433 index.setError(true); 252 434 253 case 3: 254 p = Runtime.getRuntime().exec("mgpp_weights_build -f " + this.outputStem); 255 p.waitFor(); 256 System.out.println(p.exitValue()); 257 258 p = Runtime.getRuntime().exec("mgpp_invf_dict -f " + this.outputStem); 259 p.waitFor(); 260 System.out.println(p.exitValue()); 261 262 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s1 -f " + this.outputStem + " -d " + this.outputDirectory); 263 p.waitFor(); 264 System.out.println(p.exitValue()); 265 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s2 -f " + this.outputStem + " -d " + this.outputDirectory); 266 p.waitFor(); 267 System.out.println(p.exitValue()); 268 p = Runtime.getRuntime().exec("mgpp_stem_idx -b 4096 -s3 -f " + this.outputStem + " -d " + this.outputDirectory); 269 p.waitFor(); 270 System.out.println(p.exitValue()); 271 break; 272 } 273 } 274 catch (IOException ex) { 275 System.out.println(ex); 276 ex.printStackTrace(); 277 return false; 278 } 279 catch (InterruptedException ex) { 280 System.out.println(ex); 281 ex.printStackTrace(); 282 return false; 283 } 435 return false; 436 } 437 break; 438 439 case 2: 440 System.out.println("Creating perfect hash"); 441 exit_value = Processing.runProcess("mgpp_perf_hash_build -f " + this.indexStem + osextra); 442 if (exit_value ==0) { 443 System.out.println("Perfect hashes completed"); 444 } else { 445 System.err.println("Unable to build the perfect hash"); 446 index.setError(true); 447 return false; 448 } 449 break; 450 451 case 3: 452 System.out.println("Writing weights file"); 453 exit_value = Processing.runProcess("mgpp_weights_build -f " + this.indexStem + osextra); 454 if (exit_value ==0) { 455 System.out.println("Weights file successfully written"); 456 } else { 457 System.err.println("Unable to create weights file"); 458 index.setError(true); 459 return false; 460 } 461 462 System.out.println("Creating inverted dictionary"); 463 exit_value = Processing.runProcess("mgpp_invf_dict -f " + this.indexStem + osextra); 464 if (exit_value ==0) { 465 System.out.println("Inverted dictionary file successfully written"); 466 } else { 467 System.out.println("Unable to create inverted dictionary file"); 468 index.setError(true); 469 return false; 470 } 471 472 System.out.println("Creating Stem indexes"); 473 exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s1 -f " + this.indexStem +osextra); 474 if (exit_value == 0) { 475 System.out.println("Stemmed index 1 successfully written"); 476 } else { 477 System.out.println("Unable to create stemmed index 1"); 478 index.setError(true); 479 return false; 480 } 481 482 exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s2 -f " + this.indexStem + osextra); 483 if (exit_value == 0) { 484 System.out.println("Stemmed index 2 successfully written"); 485 } else { 486 System.out.println("Unable to create stemmed index 2"); 487 index.setError(true); 488 return false; 489 } 490 exit_value = Processing.runProcess("mgpp_stem_idx -b 4096 -s3 -f " + this.indexStem + osextra); 491 if (exit_value == 0) { 492 System.out.println("Stemmed index 3 successfully written"); 493 } else { 494 System.out.println("Unable to create stemmed index 3"); 495 index.setError(true); 496 return false; 497 } 498 499 break; 500 } // switch 501 284 502 return true; 285 503 } … … 300 518 } 301 519 302 public boolean addServiceDescriptions(Element service_rack_list) 303 { 304 System.out.println("adding service description, MGPPIndexer"); 520 public boolean addServiceDescriptions(Element service_rack_list) { 521 Document doc = service_rack_list.getOwnerDocument(); 522 523 // generate the list of indexes 524 Element index_list = doc.createElement(GSXML.INDEX_ELEM+GSXML.LIST_MODIFIER); 525 Element e = doc.createElement(GSXML.INDEX_ELEM); 526 e.setAttribute(GSXML.NAME_ATT, "idx"); 527 index_list.appendChild(e); 528 String def_index = "idx"; 529 530 // boolean found_index = false; 531 // String def_index = ""; // the default index will just be the first one created for now. 532 // for (int i=0; i<this.indexes.size(); i++) { 533 // MGIndex index = (MGIndex)this.indexes.get(i); 534 // if (!index.hasError()) { 535 // Element e = doc.createElement(GSXML.INDEX_ELEM); 536 // e.setAttribute(GSXML.NAME_ATT, index.getName()); 537 // index_list.appendChild(e); 538 // if (found_index == false) { 539 // // this is the first index 540 // found_index = true; 541 // def_index = index.getName(); 542 // } 543 // } 544 // } 545 546 // if (!found_index) { 547 // // no indexes were able to be created, so we can't use them or the text 548 // return false; 549 // } 550 551 Element f = doc.createElement(GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER); 552 553 Element default_index = doc.createElement("defaultIndex"); 554 default_index.setAttribute(GSXML.NAME_ATT, def_index); 555 Element base_index_name = doc.createElement("baseIndexPrefix"); 556 base_index_name.setAttribute(GSXML.NAME_ATT, "index"); //overallName); 557 Element search_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM); 558 Element retrieve_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM); 559 Element default_level = doc.createElement("defaultLevel"); 560 default_index.setAttribute(GSXML.NAME_ATT, "Document"); 561 service_rack_list.appendChild(search_service_elem); 562 service_rack_list.appendChild(retrieve_service_elem); 563 564 search_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGPPSearch"); 565 search_service_elem.appendChild(index_list); 566 search_service_elem.appendChild(default_index); 567 search_service_elem.appendChild(default_level); 568 search_service_elem.appendChild(base_index_name); 569 570 retrieve_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGPPRetrieve"); 571 retrieve_service_elem.appendChild(default_level.cloneNode(true)); 572 retrieve_service_elem.appendChild(base_index_name.cloneNode(true)); 573 305 574 return true; 306 575 } 576 577 578 private Node recurseDOM(DocumentInterface metsDoc, Node node, 579 AbstractStructure structure, StringBuffer textBuffer, 580 StringBuffer extraBuffer, String namespace) 581 //String name, String namespace, String field) 582 { 583 // send out the ctrl-c...if this is 584 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) { 585 // try doing this for all index types 586 if ((this.currentIndexName != null)) { // && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //name.startsWith("s")) { 587 METSDivision division = (METSDivision) structure; 588 589 // get the division metadata block 590 METSDescriptive descriptive; 591 String metadataId = division.getDefaultMetadataReference(); 592 if (metadataId == null) { 593 descriptive = metsDoc.getDocumentMetadata().createDescriptive(division.getLabel()); 594 division.addMetadataReference(descriptive.getID()); 595 } 596 else { 597 // Get the descriptive item... 598 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 599 } 600 601 descriptive.addMetadata("gsdl3", "mgseqno", this.name + "." + Integer.toString(this.sectionSeqNo)); 602 603 metsDoc.setChanged(true); 604 //metsDoc.setModified(true); 605 // System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel()); 606 } // section level 607 608 // append an 'end of section' marker 609 //textBuffer.append(END_OF_SECTION); 610 this.sectionSeqNo ++; 611 612 // for document-level indexes, always append an 'end of document' tag at the 613 // end of the document for each section. Otherwise, each section is followed 614 // by an end of document character. This ensures that all indexes use the 615 // same document numbering... 616 if (this.currentIndexLevel == null || 617 this.currentIndexLevel.equals(IndexerInterface.DOCUMENT_LEVEL)) { 618 extraBuffer.append(END_OF_DOCUMENT); 619 } 620 else { 621 textBuffer.append(END_OF_DOCUMENT); 622 this.documentSeqNo ++; 623 } 624 625 // produce the body here for metadata output of divisions - in the case of 626 // text output, that will happen below... 627 if (!this.currentIndexField.equals("text")) 628 { METSDescriptive descriptive; 629 630 METSDivision division = (METSDivision) structure; 631 632 String metadataId = division.getDefaultMetadataReference(); 633 634 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 635 if (descriptive != null) { 636 List values = descriptive.getMetadata(namespace, this.currentIndexField); 637 638 if (values != null) { 639 Iterator valueIter = values.iterator(); 640 while (valueIter.hasNext()) { 641 String value = valueIter.next().toString(); 642 643 textBuffer.append(value); 644 if (valueIter.hasNext()) { 645 //textBuffer.append(END_OF_SECTION); 646 } 647 } 648 } 649 } 650 } 651 } 652 653 // go through our children as required... 654 Iterator children = structure.getChildIterator(); 655 Node startNode; 656 while (children.hasNext()) { 657 AbstractStructure child = (AbstractStructure) children.next(); 658 659 // get xpointer for child 660 // get start position node 661 if (metsDoc.getDocumentType() == "METS"){ 662 startNode = ((METSDocument) metsDoc).getSectionStartNode((METSDivision) child); 663 } else { 664 startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child); 665 } 666 //Node startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child); 667 668 // while this node isn't the child's start node, produce the HTML node text, if 669 // in text field mode... 670 if (this.currentIndexField.equals("text")) { 671 while (node != startNode) { 672 XPointer.printNode(node, textBuffer, false); 673 674 // print buffer to node 675 node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null)); 676 } 677 } 678 679 // recurse to child 680 node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // name, namespace, field); 681 } // while next child 682 683 // close a document - the actual closing \B will be done by the main 684 // loop, so only a required \C is printed here... 685 if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) { 686 while (node != null) { 687 if (this.currentIndexField.equals("text")) { 688 XPointer.printNode(node, textBuffer, false); 689 } 690 node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null)); 691 } 692 693 //textBuffer.append(END_OF_SECTION); 694 this.sectionSeqNo ++; 695 696 } 697 return node; 698 } 699 700 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace) 701 // String name, String namespace, String field) 702 { StringBuffer extraBuffer = new StringBuffer(); 703 Node node = document.getDocumentElement(); 704 StringBuffer textBuffer = new StringBuffer(); 705 706 this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //name, namespace, field); 707 textBuffer.append(extraBuffer.toString()); 708 return textBuffer.toString(); 709 } 710 307 711 }
Note:
See TracChangeset
for help on using the changeset viewer.