source: trunk/indexers/mg/java/org/greenstone/mg/MGPassesWrapper.java@ 9987

Last change on this file since 9987 was 9987, checked in by kjdon, 19 years ago

added in an exitValue/get_exit_value method to the MGPassesWrapper

  • Property svn:keywords set to Author Date Id Revision
File size: 4.9 KB
Line 
1/*
2 * MGPassesWrapper.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.mg;
20
21
22/** java wrapper class for access to gs3_mg_passes in C
23 *
24 * the native side implemented in MGPassesWrapperImpl.c
25 */
26
27public class MGPassesWrapper
28{
29 static {
30 System.loadLibrary("mgpassjni");
31 initIDs();
32 }
33
34 static final public char INVF_LEVEL_1 = '1';
35 static final public char INVF_LEVEL_2 = '2';
36 static final public char INVF_LEVEL_3 = '3';
37
38 static final public int TEXT_PASS_1 = 0;
39 static final public int TEXT_PASS_2 = 1;
40 static final public int INDEX_PASS_1 = 2;
41 static final public int INDEX_PASS_2 = 3;
42 static final public int SPECIAL_PASS = 4;
43
44 static final public int NO_STEM_OR_CASE = 0;
45 static final public int CASE_ONLY = 1;
46 static final public int STEM_ONLY = 2;
47 static final public int STEM_AND_CASE = 3;
48
49 static final public String STEMMER_ENGLISH = "english";
50 static final public String STEMMER_FRENCH = "french";
51 static final public String STEMMER_LOVIN = "lovin";
52 static final public String STEMMER_SIMPLE_FRENCH = "simple-french";
53
54 static final private char END_OF_DOCUMENT = (char) 2;
55
56 public MGPassesWrapper() {
57 initCSide();
58 }
59
60 /** initialise the pass through the documents */
61 public native boolean init();
62
63 /** add a pass declaration */
64 public void addPass(int pass) {
65 switch (pass) {
66 case TEXT_PASS_1:
67 addPass('T','1');
68 break;
69 case TEXT_PASS_2:
70 addPass('T','2');
71 break;
72 case INDEX_PASS_1:
73 addPass('I','1');
74 break;
75 case INDEX_PASS_2:
76 addPass('I','2');
77 break;
78 case SPECIAL_PASS:
79 addPass('S','1');
80 break;
81 }
82 }
83 /** set the base path */
84 public native void setBasePath(String basepath);
85 /** set the file name */
86 public native void setFileName(String filename);
87
88 public native void setStemOptions(String stemmer_type, int stem_method);
89
90 public native void setInvfLevel(char level);
91
92 /** Specify the size of the document buffer in kilobytes.
93 If any document is larger than bufsize, the program
94 will abort with an error message.
95 */
96 public native void setBufferSize(long bufsize);
97
98 /** Maximum amount of memory to use for the index pass-2 file
99 inversion in megabytes.
100 */
101 public native void setInversionMemLimit(int limit);
102
103 /** If true, treat SGML tags as non-words when building the
104 inverted file.
105 */
106 public native void ignoreSGMLTags(boolean ignore);
107
108 /** if mg_passes fails, the document that caused teh failure will be
109 output to teh trace file or STDERR.
110 */
111 public native void dumpFailedDocument(boolean dump);
112
113 /** output statistics on the compression performance to a file
114 called *.compression.stats. frequency specifies the interval
115 (in kilobytes of source text) between outputting each line of
116 statistics.
117 */
118 public native void outputCompStats(int frequency);
119
120 /** activate tracing, a line will be output every tracepos input bytes */
121 public native void enableTracing(int tracepos);
122 /** process a Greenstone document, which may consist of many MG documents (seeparated by ^B */
123 public boolean processDocument(String docs_text) {
124 // need -1 in the following to keep empty strings at the end
125 String [] docs = docs_text.split(String.valueOf(END_OF_DOCUMENT), -1);
126 System.err.println("GS document split into "+docs.length+" mg documents");
127 for (int i=0; i<docs.length; i++) {
128 try {
129 processMGDocument(docs[i].getBytes("UTF-8"));
130 } catch (Exception e) {
131 e.printStackTrace();
132 }
133 }
134 return true;
135 }
136
137 /** finalise the pass through the documents */
138 public native boolean finish();
139
140 /** get the exit value once finished */
141 public native int exitValue();
142
143 /** initialises field and method IDs for java side to enable access on C side */
144 private static native void initIDs();
145
146 /** initialises any C side stuff */
147 private native boolean initCSide();
148
149 private native void addPass(char pass_type, char pass_num);
150
151 /** process a MG document */
152 private native boolean processMGDocument(byte[] text);
153
154}
Note: See TracBrowser for help on using the repository browser.