source: main/trunk/greenstone2/common-src/indexers/mg/jni/MGRetrieveWrapperImpl.c@ 25244

Last change on this file since 25244 was 25244, checked in by ak19, 12 years ago

GS3 tomcat server crashes because java crashes owing to some error in the JNI code. The error may be related to pointers having been stored as int rather than long, an issue that's become noticeable on 64 bit linux machines. Changes have been made in the JNI code where these pointers that are transferred between Java and C++ code are stored (GetIntField and SetIntField to GetLongField and SetLongField, as well as declaration of data_ptr as jlong not jint). Committing code first without debug statements so the commits can easily be done separately.

File size: 9.8 KB
Line 
1/*
2 * MGRetrieveWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGRetrieveWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30#define MAX_INDEXES_CACHED 3
31
32/* copied from mgquery, needed to convert paragraph numbers to document numbers
33 for greenstone */
34#if defined(PARADOCNUM) || defined(NZDL)
35static int GetDocNumFromParaNum(query_data *qd, int paranum) {
36 int Documents = qd->td->cth.num_of_docs;
37 int *Paragraph = qd->paragraph;
38 int low = 1, high = Documents;
39 int mid = (low+high)/2;
40
41 while ((mid = (low+high)/2) >=1 && mid <= Documents)
42 {
43 if (paranum > Paragraph[mid])
44 low = mid+1;
45 else if (paranum <= Paragraph[mid-1])
46 high = mid-1;
47 else
48 return mid;
49 }
50 FatalError(1, "Bad paragraph number.\n");
51 return 0;
52}
53#endif
54
55
56/*********************************************
57 initialisation stuff
58 *********************************************/
59
60/* cached ids for java stuff */
61jfieldID FID_mg_data = NULL; /* MGWrapperData */
62jfieldID FID_query_result = NULL; /* MGQueryResult */
63jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
64jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
65jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
66jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
67jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
68
69
70/*******************************************
71 Index caching
72 *******************************************/
73
74query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
75
76
77/* Get the index data necessary to perform a query or document retrieval */
78query_data*
79loadIndexData(char* base_dir, char* index_path, char* text_path)
80{
81 char* index_path_name;
82 char* text_path_name;
83 query_data* qd;
84 int i = 0;
85
86 /* Form the path name of the desired indexes */
87 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
88 assert(index_path_name != NULL);
89 strcpy(index_path_name, base_dir);
90 strcat(index_path_name, index_path);
91 printf("Index pathname: %s\n", index_path_name);
92
93 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
94 assert(text_path_name != NULL);
95 strcpy(text_path_name, base_dir);
96 strcat(text_path_name, text_path);
97 printf("Text pathname: %s\n", text_path_name);
98
99 /* Search through the cached indexes for the desired one */
100 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
101 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
102 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
103
104 /* Check if the index has already been loaded */
105 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
106 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
107 /* Index has already been loaded and cached, so return it */
108 printf("Found index!\n");
109 free(index_path_name);
110 free(text_path_name);
111 return cached_indexes[i];
112 }
113
114 i++;
115 }
116
117 /* Text strings no longer needed */
118 free(index_path_name);
119 free(text_path_name);
120
121 /* The index is not cached, so load it now */
122 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
123 if (!qd) {
124 printf("Error: Could not InitQuerySystem()...\n");
125 return NULL;
126 }
127
128 /* The index loaded OK, so cache it */
129 /* This could be a little more sophisticated, eg. replace least frequently used index */
130 if (i >= MAX_INDEXES_CACHED)
131 i = MAX_INDEXES_CACHED - 1;
132
133 /* Free the index being replaced */
134 if (cached_indexes[i] != NULL)
135 FinishQuerySystem(cached_indexes[i]);
136
137 /* Cache the loaded index, and return it */
138 cached_indexes[i] = qd;
139 return cached_indexes[i];
140}
141
142/*
143 * Class: org_greenstone_mg_MGRetrieveWrapper
144 * Method: initIDs
145 * Signature: ()V
146 */
147JNIEXPORT void JNICALL
148Java_org_greenstone_mg_MGRetrieveWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
149{
150 /* a long-"J" */
151 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
152 assert(FID_mg_data != NULL);
153
154}
155
156/*
157 * Class: org_greenstone_mg_MGRetrieveWrapper
158 * Method: initCSide
159 * Signature: ()Z
160 */
161JNIEXPORT jboolean JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_initCSide
162(JNIEnv *j_env, jobject j_obj)
163{
164 /* Allocate a MGWrapperData object to store query parameters */
165 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
166 assert(data != NULL);
167
168 /* Set default values - no stemming, no case-folding, boolean OR queries */
169 data->defaultStemMethod = 0;
170 data->defaultBoolCombine = 0;
171
172 /* Allocate a QueryInfo object to store more query parameters */
173 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
174 assert(data->queryInfo != NULL);
175
176 /* Set default values - 50 documents max, return term freqs, sort by rank */
177 data->queryInfo->index = NULL;
178 data->queryInfo->maxDocs = 50;
179 data->queryInfo->needTermFreqs = 1;
180
181 /* Save the object on the Java side */
182 (*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
183
184 /* Initialise MG environment variables */
185 InitEnv();
186 SetEnv("expert", "true", NULL);
187 SetEnv("mode", "docnums", NULL);
188
189 return 1; /* true - no errors */
190}
191
192/*
193 * Class: org_greenstone_mg_MGRetrieveWrapper
194 * Method: unloadIndexData
195 * Signature: ()Z
196 */
197JNIEXPORT jboolean JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_unloadIndexData
198(JNIEnv* j_env, jobject j_obj)
199{
200 /* Free all the loaded indexes */
201 int i = 0;
202 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
203 FinishQuerySystem(cached_indexes[i]);
204 cached_indexes[i] = NULL;
205 i++;
206 }
207
208 return 1; /* true - no errors */
209}
210/* Choose MG index to search */
211JNIEXPORT void JNICALL
212Java_org_greenstone_mg_MGRetrieveWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
213 jstring j_index)
214{
215 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
216
217 /* Get the index name as a C string */
218 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
219 assert(index != NULL);
220 printf("Choosing index %s...\n", index);
221
222 /* Free the previous index name */
223 if (data->queryInfo->index)
224 free(data->queryInfo->index);
225
226 /* Allocate memory for the index name, and fill it */
227 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
228 assert(data->queryInfo->index != NULL);
229 strcpy(data->queryInfo->index, index);
230
231 /* Release the index string */
232 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
233}
234
235/*
236 * Class: org_greenstone_mg_MGRetrieveWrapper
237 * Method: getDocument
238 * Signature: (Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/String;
239 */
240JNIEXPORT jstring JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_getDocument
241(JNIEnv *j_env, jobject j_obj,
242 jstring j_base_dir, jstring j_text_path,
243 jlong j_docnum)
244{
245 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
246
247 char* index_path;
248 const char* base_dir;
249 const char* text_path;
250 query_data* qd;
251
252 mg_u_long pos, len;
253 u_char* c_buffer = NULL;
254 u_char* uc_buffer = NULL;
255 int ULen;
256
257 jstring result;
258
259 /* Make sure an index has been specified */
260 index_path = data->queryInfo->index;
261 assert(index_path != NULL);
262
263 /* Obtain C versions of the two string parameters */
264 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
265 if (base_dir == NULL) {
266 return NULL;
267 }
268 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
269 if (text_path == NULL) {
270 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
271 return NULL;
272 }
273
274 /* Load the appropriate index for satisfying this request */
275 printf("Document retrieval, index path: %s\n", index_path);
276 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
277
278 /* The C text strings are no longer needed */
279 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
280 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
281
282 /* Check that the index was loaded successfully */
283 if (qd==NULL) {
284 return NULL;
285 }
286 /*assert(qd != NULL);*/
287
288 /* Get the document position and length in the text file */
289 printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
290 FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
291 printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
292
293 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
294 c_buffer = (u_char*) malloc(len);
295 assert(c_buffer != NULL);
296 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
297 assert(uc_buffer != NULL);
298
299 /* Seek to the correct position in the file and read the document text */
300 Fseek (qd->td->TextFile, pos, 0);
301 Fread (c_buffer, 1, len, qd->td->TextFile);
302
303 /* Decompress the document text into another buffer, and terminate it */
304 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
305 uc_buffer[ULen] = '\0';
306
307 /* Load the document text into a Java string */
308 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
309 assert(result != NULL);
310
311 /* Free C buffers */
312 free(c_buffer);
313 free(uc_buffer);
314
315 /* Return the document text */
316 return result;
317}
318
Note: See TracBrowser for help on using the repository browser.