source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java@ 7583

Last change on this file since 7583 was 7583, checked in by kjdon, 20 years ago

minor stuff

  • Property svn:keywords set to Author Date Id Revision
File size: 26.0 KB
Line 
1package org.greenstone.gsdl3.gs3build.indexers;
2
3import java.util.List;
4import java.util.ArrayList;
5import java.util.Iterator;
6
7import java.io.File;
8import java.io.InputStream;
9import java.io.OutputStream;
10import java.io.IOException;
11import java.io.BufferedReader;
12import java.io.InputStreamReader;
13
14import org.w3c.dom.*;
15
16import org.greenstone.mg.*;
17
18import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
19import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
20import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocument;
21import org.greenstone.gsdl3.gs3build.metadata.*;
22import org.greenstone.gsdl3.gs3build.xpointer.XPointer;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.Misc;
25
26public class MGIndexer extends AbstractIndexer
27{
28 int pass;
29 int documentSeqNo;
30 int sectionSeqNo;
31 boolean firstDocument;
32 String outputDirectory;
33// InputStream indexerFeedback;
34// InputStream indexerErrors;
35 //OutputStream indexerTextfeed;
36 StringBuffer indexBuffer;
37 //Process mg_passes;
38 File textDirectory;
39 File indexDirectory;
40 String indexStem;
41 String textStem;
42 List indexes;
43 String overallName;
44
45 String currentIndexName;
46 String currentIndexLevel;
47 String currentIndexField;
48
49 MGPassesWrapper mgPasses;
50
51 static final char END_OF_DOCUMENT = (char) 2;
52 static final char END_OF_SECTION = (char) 3; // actually this is end of para for mg
53 static final char END_OF_STREAM = (char) 4;
54
55 public static final String MG_INDEX_TYPE = "mg";
56 public static final String INDEX_FILE_STEM = "index";
57
58 class MGIndex
59 { String name=null;
60 String level=null;
61 String field=null;
62 boolean error = false;// assume built until we get an error
63
64 public MGIndex(String name, String level, String field)
65 { this.name = name;
66 this.level = level;
67 this.field = field;
68 }
69
70 public MGIndex(String indexLabel)
71 { int colonAt = indexLabel.indexOf(':');
72
73 if (colonAt >= 0)
74 { this.field = indexLabel.substring(colonAt+1);
75 this.level = indexLabel.substring(0, colonAt);
76 createIndexName();
77 }
78 }
79
80 public String getLevel()
81 { return this.level;
82 }
83
84 public String getField()
85 { return this.field;
86 }
87
88 public String getName()
89 {
90 if (this.name==null || this.name.equals("")) {
91 createIndexName();
92 }
93 return this.name;
94 }
95
96 public boolean hasError() {
97 return this.error;
98 }
99 public void setError(boolean b) {
100 this.error = b;
101 }
102
103 private void createIndexName() {
104 StringBuffer new_name = new StringBuffer();
105 new_name.append(Character.toLowerCase((char) this.level.charAt(0)));
106
107 int c, w;
108 w = 0;
109 c = 0;
110 while (c < this.field.length() && w < 2) {
111 char ch = this.field.charAt(c);
112
113 ch = Character.toLowerCase(ch);
114 if (Character.isLetter(ch)) {
115 if (ch != 'a' && ch != 'e' && ch != 'i' &&
116 ch != 'o' && ch != 'u') {
117 new_name.append(ch);
118 w++;
119 }
120 }
121 c ++;
122 }
123 this.name = new_name.toString();
124
125 }
126 } // MGIndex
127
128 public MGIndexer(String name)
129 { this.indexes = new ArrayList();
130 this.overallName = name;
131
132 }
133
134 public String getIndexType()
135 { return MG_INDEX_TYPE;
136 }
137
138 public String getName()
139 { return this.overallName;
140 }
141
142// private String getIndexDirectory(String level, String field)
143// { StringBuffer directory = new StringBuffer();
144// directory.append(Character.toLowerCase((char) level.charAt(0)));
145
146// int c, w;
147// w = 0;
148// c = 0;
149// while (c < field.length() && w < 2) {
150// char ch = field.charAt(c);
151
152// ch = Character.toLowerCase(ch);
153// if (Character.isLetter(ch)) {
154// if (ch != 'a' && ch != 'e' && ch != 'i' &&
155// ch != 'o' && ch != 'u') {
156// directory.append(ch);
157// w++;
158// }
159// }
160// c ++;
161// }
162// return directory.toString();
163// }
164
165 /**
166 * The output directory should be (collection)/building/text/ for
167 * normal Greenstone builds.
168 *
169 * @param <code>String</code> the label to configure
170 * @param <code>String</code> the value...
171 */
172 public boolean configure(String label, String value)
173 {
174 if (label.equals(IndexerManager.outputDir)) {
175 this.outputDirectory = value;
176 this.pass = 0;
177
178 // attempt to ensure that the text subdirectory exists
179 this.textDirectory = new File(outputDirectory, "text");
180 if (!textDirectory.exists()) {
181 if (!textDirectory.mkdir()) {
182 return false;
183 }
184 }
185 else if (!textDirectory.isDirectory()) {
186 return false;
187 }
188 this.textStem = this.textDirectory.getPath() + File.separator + INDEX_FILE_STEM;
189
190 // Sign to the user which mg directory is being used...
191 System.out.println("Output MG directory is " + this.textStem);
192 }
193 else if (label.equals(IndexerInterface.GS2_INDEX_LABEL)) {
194 this.indexes.add(new MGIndex(value));
195 }
196
197 return true;
198 }
199
200 public boolean addIndex(String name, String level, String field)
201 {
202 MGIndex index = new MGIndex(name, level, field);
203 this.indexes.add(index);
204 return true;
205 }
206
207 private Node recurseDOM(DocumentInterface metsDoc, Node node,
208 AbstractStructure structure, StringBuffer textBuffer,
209 StringBuffer extraBuffer, String namespace)
210 //String name, String namespace, String field)
211 {
212 // send out the ctrl-c...if this is
213 if (structure.getStructureType().equals(METSDivision.DIVISION_TYPE)) {
214 // try doing this for all index types
215 if ((this.currentIndexName != null)) { // && this.level != null && this.level.equals(IndexerInterface.SECTION_LEVEL)) { //name.startsWith("s")) {
216 METSDivision division = (METSDivision) structure;
217
218 // get the division metadata block
219 METSDescriptive descriptive;
220 String metadataId = division.getDefaultMetadataReference();
221 if (metadataId == null) {
222 descriptive = metsDoc.getDocumentMetadata().createDescriptive(division.getLabel());
223 division.addMetadataReference(descriptive.getID());
224 }
225 else {
226 // Get the descriptive item...
227 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
228 }
229
230 descriptive.addMetadata("gsdl3", "mgseqno", this.overallName + "." + Integer.toString(this.sectionSeqNo));
231 metsDoc.setModified(true);
232 // System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel());
233 } // section level
234
235 // append an 'end of section' marker
236 //textBuffer.append(END_OF_SECTION);
237 this.sectionSeqNo ++;
238
239 // for document-level indexes, always append an 'end of document' tag at the
240 // end of the document for each section. Otherwise, each section is followed
241 // by an end of document character. This ensures that all indexes use the
242 // same document numbering...
243 if (this.currentIndexLevel == null ||
244 this.currentIndexLevel.equals(IndexerInterface.DOCUMENT_LEVEL)) {
245 extraBuffer.append(END_OF_DOCUMENT);
246 }
247 else {
248 textBuffer.append(END_OF_DOCUMENT);
249 this.documentSeqNo ++;
250 }
251
252 // produce the body here for metadata output of divisions - in the case of
253 // text output, that will happen below...
254 if (!this.currentIndexField.equals("text"))
255 { METSDescriptive descriptive;
256
257 METSDivision division = (METSDivision) structure;
258
259 String metadataId = division.getDefaultMetadataReference();
260
261 descriptive = metsDoc.getDocumentMetadata().getDescriptiveById(metadataId);
262 if (descriptive != null) {
263 List values = descriptive.getMetadata(namespace, this.currentIndexField);
264
265 if (values != null) {
266 Iterator valueIter = values.iterator();
267 while (valueIter.hasNext()) {
268 String value = valueIter.next().toString();
269
270 textBuffer.append(value);
271 if (valueIter.hasNext()) {
272 //textBuffer.append(END_OF_SECTION);
273 }
274 }
275 }
276 }
277 }
278 }
279
280 // go through our children as required...
281 Iterator children = structure.getChildIterator();
282 while (children.hasNext()) {
283 AbstractStructure child = (AbstractStructure) children.next();
284
285 // get xpointer for child
286 // get start position node
287 Node startNode = ((HTMLDocument) metsDoc).getSectionStartNode((METSDivision) child);
288
289 // while this node isn't the child's start node, produce the HTML node text, if
290 // in text field mode...
291 if (this.currentIndexField.equals("text")) {
292 while (node != startNode) {
293 XPointer.printNode(node, textBuffer, false);
294
295 // print buffer to node
296 node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
297 }
298 }
299
300 // recurse to child
301 node = this.recurseDOM(metsDoc, node, child, textBuffer, extraBuffer, namespace); // name, namespace, field);
302 }
303
304 // close a document - the actual closing \B will be done by the main
305 // loop, so only a required \C is printed here...
306 if (structure.getStructureType().equals(METSStructure.STRUCTURE_TYPE)) {
307 while (node != null) {
308 if (this.currentIndexField.equals("text")) {
309 XPointer.printNode(node, textBuffer, false);
310 }
311 node = XPointer.getNextNode(node, (this.currentIndexField.equals("text") ? textBuffer : null));
312 }
313
314 //textBuffer.append(END_OF_SECTION);
315 this.sectionSeqNo ++;
316
317 }
318 return node;
319 }
320
321 private String prepareDOM(DocumentInterface metsDoc, Document document, METSStructure structure, String namespace)
322 // String name, String namespace, String field)
323 { StringBuffer extraBuffer = new StringBuffer();
324 Node node = document.getDocumentElement();
325 StringBuffer textBuffer = new StringBuffer();
326
327 this.recurseDOM(metsDoc, node, structure, textBuffer, extraBuffer, namespace); //name, namespace, field);
328 textBuffer.append(extraBuffer.toString());
329 return textBuffer.toString();
330 }
331
332 /**
333 * Index a single document; the document interface can be used to extract individual
334 * metadata items etc. as required or desired and index those instead or as well as
335 * the body text of the document.
336 */
337 public boolean indexDocument(DocumentID docID, DocumentInterface document)
338 {
339 if (this.pass == 0) {
340 document.removeAllMetadata("gsdl3", "mgseqno");
341 }
342
343 if (!this.firstDocument) {
344 // Send a 'CTRL-B' before the document itself
345 // try {
346 //this.indexerTextfeed.write(END_OF_DOCUMENT);
347 this.indexBuffer.append(END_OF_DOCUMENT);
348 mgPasses.processDocument(indexBuffer.toString());
349 this.indexBuffer.delete(0, this.indexBuffer.length());
350
351 }
352 // }
353 // catch (IOException ex)
354 // { System.out.println("Bad output on end of document" + ex);
355 // ex.printStackTrace();
356 // return false;
357 // }
358
359
360 String docText = null;
361
362 int startSeqNo = this.sectionSeqNo;
363 this.sectionSeqNo ++;
364
365 Document domDocument = document.getDOMDocument();
366 if (domDocument != null) {
367 System.err.println("dom doc is not null");
368 METSStructure sections = document.getDocumentStructure().getStructure("Section");
369 if (sections != null) {
370 System.err.println("sections are not null");
371 docText = this.prepareDOM(document, domDocument, sections, "gsdl3"); //this.name, "gsdl3", this.field);
372 // System.out.println(docText);
373 }
374 }
375 if (docText == null) {
376 System.err.println("doc text is null");
377 if (this.currentIndexField.equals("text")) {
378 docText = Character.toString(END_OF_DOCUMENT) /*+ Character.toString(END_OF_SECTION)*/ + document.getDocumentText();
379 System.err.println("prepending EOD to doctext");
380
381 }
382 else {
383 StringBuffer textBuffer = new StringBuffer();
384 textBuffer.append(END_OF_DOCUMENT);
385 System.err.println("* appending EOD to text");
386
387 //textBuffer.append(END_OF_SECTION);
388 List values = document.getDocumentMetadataItem("gsdl3", this.currentIndexField);
389 if (values != null) {
390 Iterator valueIter = values.iterator();
391 while (valueIter.hasNext()) {
392 String value = valueIter.next().toString();
393
394 textBuffer.append(value);
395 if (valueIter.hasNext()) {
396 //textBuffer.append(END_OF_SECTION);
397 // sectionSeqNo ++;
398 }
399 }
400 }
401 else {
402 textBuffer.append("No data");
403 }
404 docText = textBuffer.toString();
405 }
406 sectionSeqNo ++;
407 }
408
409
410 this.indexBuffer.append(docText);
411 //byte [] bytes = docText.getBytes();
412 //int pos = 0, end = bytes.length;
413
414 /*
415 try {
416 while (pos < end) {
417 //this.indexerTextfeed.write(bytes, pos, (end - pos > 512 ? 512 : end - pos));
418 this.indexBuffer.append((char [])bytes, pos, (end - pos > 512 ? 512 : end - pos));
419 pos = pos + 512;
420
421 try {
422 while (this.indexerFeedback.available() > 0)
423 { byte b[] = new byte[this.indexerFeedback.available()];
424 System.out.println("Feedback of " + this.indexerFeedback.available());
425 this.indexerFeedback.read(b);
426 System.out.println(b);
427 }
428 }
429 catch (IOException ex)
430 { System.out.println(ex);
431 }
432
433
434 try {
435 while (this.indexerErrors.available() > 0)
436 { byte b[] = new byte[this.indexerErrors.available()];
437 System.out.println("Feedback of " + this.indexerErrors.available());
438 this.indexerErrors.read(b);
439 System.out.println(new String(b));
440 }
441 }
442 catch (IOException ex)
443 { System.out.println(ex);
444 }
445 }
446 }
447 catch (IOException ex)
448 { System.out.println("Bad output during document write " + ex + " " + pos + " " + end);
449 ex.printStackTrace();
450 return false;
451 }
452 */
453 // remember that we're not on the first document,
454 this.firstDocument = false;
455 // assign the sequence number on the first pass only, and increment the sequence number.
456 if (this.pass == 0) {
457 //document.addDocumentMetadata("gsdl3", "mgseqno", "dtx."+Integer.toString(startSeqNo));
458 document.addDocumentMetadata("gsdl3", "mgseqno", this.overallName+"."+Integer.toString(startSeqNo));
459 //System.out.println("Assigning " + startSeqNo + " to " + document.getID());
460 }
461 this.documentSeqNo += 1;
462
463 // try {
464// while (this.indexerErrors.available() > 0)
465// { char c = (char) this.indexerErrors.read();
466// System.out.println(c);
467// }
468// while (this.indexerFeedback.available() > 0)
469// { byte b[] = new byte[this.indexerFeedback.available()];
470// System.out.println("Feedback of " + this.indexerFeedback.available());
471// this.indexerFeedback.read(b);
472// }
473// }
474// catch (IOException ex)
475// {
476// }
477 return true;
478 }
479
480 /**
481 * Initialise the pass: open required files, check status
482 */
483 public boolean startPass(int passNumber)
484 {
485
486 this.pass = passNumber;
487 this.firstDocument = true;
488 this.documentSeqNo = 1;
489 this.sectionSeqNo = 1;
490
491 this.mgPasses = new MGPassesWrapper();
492 this.indexBuffer = new StringBuffer();
493 int indexNo = (this.pass - 2) / 2;
494 MGIndex index = null;
495 if (this.pass >= 2) {
496 index = (MGIndex) this.indexes.get(indexNo);
497 if (index.hasError()) {
498 // an error has already occurred for this index, don't continue
499 System.out.println("pass "+this.pass+": aborted due to errors in the previous pass");
500 return false;
501 }
502 // attempt to ensure that the text subdirectory exists
503 //this.indexDirectory = new File(outputDirectory, this.getIndexDirectory(index.getLevel(), index.getField()));
504 this.indexDirectory = new File(outputDirectory, index.getName());
505 if (!indexDirectory.exists()) {
506 if (!indexDirectory.mkdir()) {
507 return false;
508 }
509 }
510 else if (!indexDirectory.isDirectory()) {
511 return false;
512 }
513
514 this.currentIndexLevel = index.getLevel();
515 this.currentIndexField = index.getField();
516 this.currentIndexName = index.getName();
517
518 if (this.currentIndexLevel == null || this.currentIndexField == null ) {
519 System.out.println("invalid index - level or field was null");
520 return false;
521 }
522 //if (this.currentIndexName == null || this.currentIndexName.length() == 0) {
523 // this.currentIndexName = getIndexDirectory(index.getLevel(), index.getField());
524 // }
525 this.indexStem = this.indexDirectory.getPath() + File.separatorChar + INDEX_FILE_STEM; // TODO: modify for index
526 if (this.pass % 2 == 1) {
527 this.currentIndexName = null; // why???
528 }
529 }
530 else {
531
532 this.currentIndexField = "text";
533 this.currentIndexLevel = "section";
534 this.currentIndexName = null;
535 }
536
537 // get the parameters for this execution of mg_passes
538 //String pathParams = "-f index -d " + (this.pass < 2 ? this.textDirectory.toString() : this.indexDirectory.toString());
539 mgPasses.setFileName((this.pass < 2 ? this.textDirectory.toString() : this.indexDirectory.toString())+File.separator+ "index");
540 if (!Misc.isWindows()) {
541 mgPasses.setBasePath("/");
542 }
543 int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
544
545 mgPasses.setBufferSize(100000);
546 // try {
547 // TODO add the other options to mg passes
548 switch (mgPass) {
549 case 0:
550 //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams + " -b 100000 -T1");
551 mgPasses.addPass(MGPassesWrapper.TEXT_PASS_1);
552
553
554 break;
555
556 case 1:
557 //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -T2");
558 mgPasses.addPass(MGPassesWrapper.TEXT_PASS_2);
559 break;
560
561 case 2:
562 //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams + " -b 100000 -2 -m 32 -s 0 -G -t 10 -N1");
563 mgPasses.addPass(MGPassesWrapper.INDEX_PASS_1);
564 mgPasses.setInvfLevel(MGPassesWrapper.INVF_LEVEL_2);
565 mgPasses.setStemOptions(MGPassesWrapper.STEMMER_ENGLISH, MGPassesWrapper.NO_STEM_OR_CASE);
566 mgPasses.setInversionMemLimit(32);
567 mgPasses.ignoreSGMLTags(true);
568 break;
569
570 case 3:
571 //mg_passes = Runtime.getRuntime().exec("mg_passes " + pathParams +" -b 100000 -2 -c 3 -G -t 10 -N2");
572 mgPasses.addPass(MGPassesWrapper.INDEX_PASS_2);
573 mgPasses.setInvfLevel(MGPassesWrapper.INVF_LEVEL_2);
574 mgPasses.ignoreSGMLTags(true);
575 break;
576 }
577
578 mgPasses.init();
579 // this.indexerFeedback = mg_passes.getInputStream();
580 //this.indexerErrors = mg_passes.getErrorStream();
581 //this.indexerTextfeed = mg_passes.getOutputStream();
582 // }
583 // catch (IOException ex)
584 // { System.out.println(ex);
585 // ex.printStackTrace();
586 // index.setError(true);
587 // return false;
588 // }
589 // catch (InterruptedException ex)
590// { System.out.println(ex);
591// ex.printStackTrace();
592// index.setError(true);
593// return false;
594// }
595 System.out.println("Pass " + this.pass);
596 return true;
597 }
598
599 public void printProcessOutput(Process p)
600 throws IOException {
601 BufferedReader error_stream = new BufferedReader(new InputStreamReader( p.getErrorStream(), "UTF-8" ));
602 BufferedReader output_stream = new BufferedReader(new InputStreamReader( p.getInputStream(), "UTF-8" ));
603 while (output_stream.ready()) {
604 System.err.println("out> "+output_stream.readLine());
605 }
606 while (error_stream.ready()) {
607 System.err.println("err> "+error_stream.readLine());
608 }
609
610 }
611 /**
612 * Complete a pass - reset file counters, close files, etc.
613 */
614 public boolean endPass(int passNumber)
615 { Process p;
616
617 int indexNo = (passNumber - 2) / 2;
618 MGIndex index = null;
619 if (passNumber >= 2) {
620 index = (MGIndex) this.indexes.get(indexNo);
621 }
622 try {
623 //this.indexerTextfeed.write(END_OF_DOCUMENT);
624 //this.indexerTextfeed.write(END_OF_STREAM);
625 this.indexBuffer.append(END_OF_DOCUMENT);
626 mgPasses.processDocument(indexBuffer.toString());
627 this.indexBuffer.delete(0, this.indexBuffer.length());
628// while (this.indexerErrors.available() > 0)
629// { char c = (char) this.indexerErrors.read();
630// System.out.print(c);
631// }
632// while (this.indexerFeedback.available() > 0)
633// { byte b[] = new byte[this.indexerFeedback.available()];
634// System.out.print("Feedback of " + this.indexerFeedback.available());
635// this.indexerFeedback.read(b);
636// }
637
638 //this.indexerTextfeed.close();
639 Thread.sleep(1000);
640 //this.mg_passes.waitFor();
641 }
642// catch (IOException ex)
643// { System.out.println(ex);
644// }
645 catch (InterruptedException ex)
646 { System.out.println(ex);
647 }
648 // int exitValue = this.mg_passes.exitValue();
649 mgPasses.finish();
650 try {
651 Thread.sleep(1000);
652 } catch (Exception e) {}
653 int exitValue = 0;
654 System.out.println("Pass " + this.pass + " completed with " + exitValue);
655 if (exitValue !=0) {
656 //assume something has gone wrong, don't continue
657 if (index != null) {
658 index.setError(true);
659 return false;
660 }
661 }
662 int mgPass = this.pass < 2 ? this.pass : ((this.pass % 2) + 2);
663 String osextra = "";
664 if (!Misc.isWindows()) {
665 osextra = " -d / ";
666 }
667 try {
668 switch (mgPass)
669 {
670 case 0:
671 System.out.println("Compressing dictionary");
672 p = Runtime.getRuntime().exec("mg_compression_dict -f " + this.textDirectory.toString()+File.separator+"index" + osextra + " -S -H -2 -k 5120");
673 p.waitFor();
674 printProcessOutput(p);
675 if (p.exitValue() != 0) {
676 System.out.println("Error from mg_compression_dict: " + p.exitValue());
677 index.setError(true);
678
679 return false;
680 }
681 else {
682 System.out.println("Compressed dictionary successfully written");
683 }
684 break;
685
686 case 2:
687 System.out.println("Creating perfect hash");
688 p = Runtime.getRuntime().exec("mg_perf_hash_build -f " + this.indexDirectory.toString()+File.separator+ "index"+osextra);
689 p.waitFor();
690 printProcessOutput(p);
691 if (p.exitValue() == 0) {
692 System.out.println("Perfect hashes completed");
693 } else {
694 System.out.println("Unable to build the perfect hash");
695 index.setError(true);
696 return false;
697 }
698 break;
699
700 case 3:
701 System.out.println("Writing weights file");
702 p = Runtime.getRuntime().exec("mg_weights_build -f " + this.indexStem + " -t " + this.textStem + osextra);
703 p.waitFor();
704 printProcessOutput(p);
705 if (p.exitValue() == 0) {
706 System.out.println("Weights file successfully written");
707 }
708 else {
709 System.out.println("Unable to create weights file");
710 index.setError(true);
711 return false;
712
713 }
714
715 p = Runtime.getRuntime().exec("mg_invf_dict -f " + this.indexDirectory.toString()+File.separator+"index" + osextra);
716 p.waitFor();
717 printProcessOutput(p);
718 if (p.exitValue() == 0) {
719 System.out.println("Inverted dictionary file successfully written");
720 }
721 else {
722 System.out.println("Unable to create inverted dictionary file");
723 index.setError(true);
724 return false;
725
726 }
727
728 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s1 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
729 p.waitFor();
730 printProcessOutput(p);
731 if (p.exitValue() == 0) {
732 System.out.println("Stemmed index 1 successfully written");
733 }
734 else {
735 System.out.println("Unable to create stemmed index 1");
736 index.setError(true);
737 return false;
738
739 }
740
741 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s2 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
742 p.waitFor();
743 printProcessOutput(p);
744 if (p.exitValue() == 0) {
745 System.out.println("Stemmed index 2 successfully written");
746 }
747 else {
748 System.out.println("Unable to create stemmed index 2");
749 index.setError(true);
750 return false;
751 }
752
753 p = Runtime.getRuntime().exec("mg_stem_idx -b 4096 -s3 -f " + this.indexDirectory.toString()+File.separator+"index"+osextra);
754 p.waitFor();
755 printProcessOutput(p);
756 if (p.exitValue() == 0) {
757 System.out.println("Stemmed index 3 successfully written");
758 }
759 else {
760 System.out.println("Unable to create stemmed index 3");
761 index.setError(true);
762 return false;
763 }
764 break;
765 }
766 }
767 catch (IOException ex)
768 { System.out.println(ex);
769 ex.printStackTrace();
770 index.setError(true);
771 return false;
772 }
773 catch (InterruptedException ex)
774 { System.out.println(ex);
775 ex.printStackTrace();
776 index.setError(true);
777 return false;
778 }
779 mgPasses = null;
780 return true;
781 }
782
783 /**
784 * Do any tidying up
785 */
786 public void tidyup()
787 {
788 }
789
790 /**
791 * Return the number of passes required for this index.
792 */
793 public int getNumberOfPasses()
794 { return 2 + this.indexes.size() * 2;
795 }
796
797 public boolean addServiceDescriptions(org.w3c.dom.Element service_rack_list) {
798 System.out.println("adding service description, MGIndexer");
799 Document doc = service_rack_list.getOwnerDocument();
800
801 // generate the list of indexes
802 Element index_list = doc.createElement(GSXML.INDEX_ELEM+GSXML.LIST_MODIFIER);
803 boolean found_index = false;
804 String def_index = ""; // the default index will just be the first one created for now.
805 for (int i=0; i<this.indexes.size(); i++) {
806 MGIndex index = (MGIndex)this.indexes.get(i);
807 if (!index.hasError()) {
808 Element e = doc.createElement(GSXML.INDEX_ELEM);
809 e.setAttribute(GSXML.NAME_ATT, index.getName());
810 index_list.appendChild(e);
811 if (found_index == false) {
812 // this is the first index
813 found_index = true;
814 def_index = index.getName();
815 }
816 }
817 }
818
819 if (!found_index) {
820 // no indexes were able to be created, so we can't use them or the text
821 return false;
822 }
823 Element default_index = doc.createElement("defaultIndex");
824 default_index.setAttribute(GSXML.NAME_ATT, def_index);
825 Element base_index_name = doc.createElement("baseIndexPrefix");
826 base_index_name.setAttribute(GSXML.NAME_ATT, overallName);
827 Element search_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
828 Element retrieve_service_elem = doc.createElement(GSXML.SERVICE_CLASS_ELEM);
829 service_rack_list.appendChild(search_service_elem);
830 service_rack_list.appendChild(retrieve_service_elem);
831
832 search_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGSearch");
833 search_service_elem.appendChild(index_list);
834 search_service_elem.appendChild(default_index);
835 search_service_elem.appendChild(base_index_name);
836
837 retrieve_service_elem.setAttribute(GSXML.NAME_ATT, "GS3MGRetrieve");
838 retrieve_service_elem.appendChild(default_index.cloneNode(true));
839 retrieve_service_elem.appendChild(base_index_name.cloneNode(true));
840
841 return true;
842 }
843
844}
845
Note: See TracBrowser for help on using the repository browser.