Changeset 6376
- Timestamp:
- 2004-01-09T12:51:47+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/IndexerManager.java
r6355 r6376 53 53 if (document.isIndexed()) { 54 54 if (!this.indexers[i].indexDocument(document.getID(), document)) { 55 System.out.println("Ending document ");55 System.out.println("Ending document " + document.getID()); 56 56 } 57 57 58 58 // note any changes made to this document... 59 if (document.isModified() || true) {59 if (document.isModified()) { 60 60 this.documents.modifiedDocument(document); 61 // System.out.println("Writing document "+document.getID()); 61 62 } 63 /** 64 if (p == 0) { 65 System.out.println("Writing document "+document.getID()); 66 } 67 */ 62 68 } 63 69 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
r6349 r6376 34 34 String textStem; 35 35 List indexes; 36 String indexName; 36 37 String level; 37 38 String field; 39 40 static final char END_OF_DOCUMENT = (char) 2; 41 static final char END_OF_SECTION = (char) 3; 42 static final char END_OF_STREAM = (char) 4; 38 43 39 44 class MGIndex … … 134 139 135 140 private Node recurseDOM(DocumentInterface metsDoc, Node node, 136 AbstractStructure structure, StringBuffer buffer, 141 AbstractStructure structure, StringBuffer textBuffer, 142 StringBuffer extraBuffer, String indexName, 137 143 String namespace, String field) 138 144 { 139 145 // send out the ctrl-c...if this is 140 146 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) { 141 if ( this.pass == 0) {147 if ((indexName != null) && indexName.startsWith("s")) { 142 148 METSDivision division = (METSDivision) structure; 143 149 … … 154 160 } 155 161 156 descriptive.addMetadata("gsdl3", "mgseqno", Integer.toString(this.sectionSeqNo)); 157 } 158 159 buffer.append((char) 3); 160 if (this.level != null && 161 this.level.equals(IndexerInterface.SECTION_LEVEL)) { 162 buffer.append((char) 2); 162 descriptive.addMetadata("gsdl3", "mgseqno", indexName + "." + Integer.toString(this.documentSeqNo)); 163 metsDoc.setModified(true); 164 // System.out.println("Assigning " + this.documentSeqNo + " to " + metsDoc.getID() + " " + division.getLabel()); 165 } 166 167 // append an 'end of section' marker 168 textBuffer.append(END_OF_SECTION); 169 170 // for document-level indexes, always append an 'end of document' tag at the 171 // end of the document for each section. Otherwise, each section is followed 172 // by an end of document character. This ensures that all indexes use the 173 // same document numbering... 174 if (this.level == null || 175 this.level.equals(IndexerInterface.DOCUMENT_LEVEL)) { 176 // extraBuffer.append(END_OF_DOCUMENT); 177 } 178 else { 179 textBuffer.append(END_OF_DOCUMENT); 180 this.documentSeqNo ++; 163 181 } 164 182 this.sectionSeqNo ++; 165 } 166 167 // go through our children if required... 183 184 // produce the body here for metadata output of divisions - in the case of 185 // text output, that will happen below... 186 if (!this.field.equals("text")) 187 { METSDescriptive descriptive; 188 189 METSDivision division = (METSDivision) structure; 190 191 String metadataId = division.getDefaultMetadataReference(); 192 193 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 194 if (descriptive != null) { 195 List values = descriptive.getMetadata(namespace, field); 196 197 if (values != null) { 198 Iterator valueIter = values.iterator(); 199 while (valueIter.hasNext()) { 200 String value = valueIter.next().toString(); 201 202 textBuffer.append(value); 203 if (valueIter.hasNext()) { 204 textBuffer.append(END_OF_SECTION); 205 } 206 } 207 } 208 } 209 } 210 } 211 212 // go through our children as required... 168 213 Iterator children = structure.getChildIterator(); 169 214 while (children.hasNext()) { … … 174 219 Node startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child); 175 220 176 // while this node isn't the child's start node, produce the node text 221 // while this node isn't the child's start node, produce the HTML node text, if 222 // in text field mode... 177 223 if (field.equals("text")) { 178 224 while (node != startNode) { 179 XPointer.printNode(node, buffer, false);225 XPointer.printNode(node, textBuffer, false); 180 226 181 227 // print buffer to node 182 node = XPointer.getNextNode(node, (field.equals("text") ? buffer : null));228 node = XPointer.getNextNode(node, (field.equals("text") ? textBuffer : null)); 183 229 } 184 230 } 185 231 186 232 // recurse to child 187 this.recurseDOM(metsDoc, node, child, buffer, namespace, field);233 this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, indexName, namespace, field); 188 234 } 189 235 … … 193 239 while (node != null) { 194 240 if (field.equals("text")) { 195 XPointer.printNode(node, buffer, false); 196 } 197 else { 198 METSDescriptive descriptive; 199 200 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) { 201 METSDivision division = (METSDivision) structure; 202 203 String metadataId = division.getDefaultMetadataReference(); 204 205 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId); 206 if (descriptive != null) { 207 List values = descriptive.getMetadata(namespace, field); 208 209 Iterator valueIter = values.iterator(); 210 while (valueIter.hasNext()) { 211 String value = valueIter.next().toString(); 212 213 buffer.append(value); 214 if (valueIter.hasNext()) { 215 buffer.append((char) 3); 216 } 217 } 218 } 219 } 220 } 221 node = XPointer.getNextNode(node, (field.equals("text") ? buffer : null)); 222 } 223 buffer.append((char) 3); 241 XPointer.printNode(node, textBuffer, false); 242 } 243 node = XPointer.getNextNode(node, (field.equals("text") ? textBuffer : null)); 244 } 245 /* 246 textBuffer.append(END_OF_SECTION); 224 247 this.sectionSeqNo ++; 248 */ 225 249 } 226 250 return node; 227 251 } 228 252 229 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace, String field) 230 { Node node = document.getDocumentElement(); 253 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, 254 String indexName, String namespace, String field) 255 { StringBuffer extraBuffer = new StringBuffer(); 256 Node node = document.getDocumentElement(); 231 257 StringBuffer textBuffer = new StringBuffer(); 232 233 this.recurseDOM(metsDoc, node, structure, textBuffer, namespace, field); 258 259 this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, indexName, namespace, field); 260 textBuffer.append(extraBuffer.toString()); 234 261 return textBuffer.toString(); 235 262 } … … 255 282 String docText = null; 256 283 284 int startSeqNo = this.sectionSeqNo; 285 257 286 Document domDocument = document.getDOMDocument(); 258 if (domDocument != null) { 287 if (domDocument != null) { 259 288 METSStructure sections = document.getDocumentStructure().getStructure("Section"); 260 289 if (sections != null) { 261 docText = this.prepareDOM(document, domDocument, sections, "gsdl3", this.field);290 docText = this.prepareDOM(document, domDocument, sections, this.indexName, "gsdl3", this.field); 262 291 // System.out.println(docText); 263 292 } 264 293 } 265 294 if (docText == null) { 266 docText = document.getDocumentText(); 295 if (this.field.equals("text")) { 296 docText = Character.toString(END_OF_DOCUMENT) + Character.toString(END_OF_SECTION) + 297 document.getDocumentText(); 298 } 299 else { 300 StringBuffer textBuffer = new StringBuffer(); 301 textBuffer.append(END_OF_DOCUMENT); 302 textBuffer.append(END_OF_SECTION); 303 List values = document.getDocumentMetadataItem("gsdl3", this.field); 304 if (values != null) { 305 Iterator valueIter = values.iterator(); 306 while (valueIter.hasNext()) { 307 String value = valueIter.next().toString(); 308 309 textBuffer.append(value); 310 if (valueIter.hasNext()) { 311 textBuffer.append(END_OF_SECTION); 312 sectionSeqNo ++; 313 } 314 } 315 } 316 else { 317 textBuffer.append("No data"); 318 } 319 docText = textBuffer.toString(); 320 } 321 sectionSeqNo ++; 267 322 } 268 323 … … 284 339 } 285 340 catch (IOException ex) 286 { 341 { System.out.println(ex); 287 342 } 288 343 … … 297 352 } 298 353 catch (IOException ex) 299 { 354 { System.out.println(ex); 300 355 } 301 356 } … … 311 366 this.firstDocument = false; 312 367 if (this.pass == 0) { 313 document.addDocumentMetadata("gsdl3", "mgseqno", Integer.toString(this.documentSeqNo)); 368 document.addDocumentMetadata("gsdl3", "mgseqno", "dtx."+Integer.toString(this.documentSeqNo)); 369 // System.out.println("Assigning " + startSeqNo + " to " + document.getID()); 314 370 } 315 371 this.documentSeqNo += 1; 316 372 317 373 try { 318 374 while (this.indexerErrors.available() > 0) … … 343 399 344 400 int indexNo = (this.pass - 2) / 2; 345 if ( indexNo >= 0) {401 if (this.pass >= 2) { 346 402 MGIndex index = (MGIndex) this.indexes.get(indexNo); 347 403 … … 357 413 } 358 414 359 this.indexStem = this.outputDirectory + File.separatorChar +360 this.getIndexDirectory(index.getLevel(), index.getField()) +361 File.separatorChar + "index"; // TODO: modify for index362 415 this.level = index.getLevel(); 363 416 this.field = index.getField(); 417 this.indexName = this.getIndexDirectory(index.getLevel(), index.getField()); 418 this.indexStem = this.outputDirectory + File.separatorChar + 419 this.indexName + File.separatorChar + "index"; // TODO: modify for index 420 if (this.pass % 2 == 1) { 421 this.indexName = null; 422 } 364 423 } 365 424 else { 366 425 this.field = "text"; 367 } 426 this.level = "document"; 427 this.indexName = null; 428 } 429 System.out.println("level is " + this.level); 430 System.out.println("field is " + this.field); 431 System.out.println("index name is " + this.indexName); 368 432 369 433 // get the parameters for this execution of mg_passes … … 422 486 423 487 try { 424 this.indexerTextfeed.write( (char) 2);425 this.indexerTextfeed.write( 4);488 this.indexerTextfeed.write(END_OF_DOCUMENT); 489 this.indexerTextfeed.write(END_OF_STREAM); 426 490 while (this.indexerErrors.available() > 0) 427 491 { char c = (char) this.indexerErrors.read(); … … 444 508 { System.out.println(ex); 445 509 } 446 System.out.println(" Completed with " + this.mg_passes.exitValue());510 System.out.println("Pass " + this.pass + " completed with " + this.mg_passes.exitValue()); 447 511 448 512 int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2); … … 458 522 System.out.println("Error from mg_compression_dict: " + p.exitValue()); 459 523 } 524 else { 525 System.out.println("Compressed dictionary successfully written"); 526 } 460 527 break; 461 528 … … 468 535 } 469 536 else { 470 System.out.println("Unable to create weights file ");537 System.out.println("Unable to create weights file " + "mg_weights_build -f " + this.indexStem + " -t " + this.textStem + " -d /"); 471 538 } 472 539
Note:
See TracChangeset
for help on using the changeset viewer.