root/main/trunk/greenstone2/common-src/indexers/mg/jni/MGRetrieveWrapperImpl.c @ 25244

Revision 25244, 9.8 KB (checked in by ak19, 8 years ago)

GS3 tomcat server crashes because java crashes owing to some error in the JNI code. The error may be related to pointers having been stored as int rather than long, an issue that's become noticeable on 64 bit linux machines. Changes have been made in the JNI code where these pointers that are transferred between Java and C++ code are stored (GetIntField? and SetIntField? to GetLongField? and SetLongField?, as well as declaration of data_ptr as jlong not jint). Committing code first without debug statements so the commits can easily be done separately.

Line 
1/*
2 *    MGRetrieveWrapperImpl.c
3 *    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 *    This program is free software; you can redistribute it and/or modify
6 *    it under the terms of the GNU General Public License as published by
7 *    the Free Software Foundation; either version 2 of the License, or
8 *    (at your option) any later version.
9 *
10 *    This program is distributed in the hope that it will be useful,
11 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 *    GNU General Public License for more details.
14 *
15 *    You should have received a copy of the GNU General Public License
16 *    along with this program; if not, write to the Free Software
17 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGRetrieveWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30#define MAX_INDEXES_CACHED 3
31
32/* copied from mgquery, needed to convert paragraph numbers to document numbers
33   for greenstone */
34#if defined(PARADOCNUM) ||  defined(NZDL)
35static int GetDocNumFromParaNum(query_data *qd, int paranum) {
36  int Documents = qd->td->cth.num_of_docs;
37  int *Paragraph = qd->paragraph;
38  int low = 1, high = Documents;
39  int mid = (low+high)/2;
40
41  while ((mid = (low+high)/2) >=1 && mid <= Documents)
42    {
43      if (paranum > Paragraph[mid])
44        low = mid+1;
45      else if (paranum <= Paragraph[mid-1])
46        high = mid-1;
47      else
48        return mid;
49    }
50  FatalError(1, "Bad paragraph number.\n");
51  return 0;
52}
53#endif
54
55
56/*********************************************
57   initialisation stuff
58 *********************************************/
59
60/* cached ids for java stuff */
61jfieldID FID_mg_data = NULL; /* MGWrapperData */
62jfieldID FID_query_result = NULL; /* MGQueryResult */
63jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
64jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
65jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
66jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
67jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
68
69
70/*******************************************
71   Index caching
72 *******************************************/
73
74query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
75
76
77/* Get the index data necessary to perform a query or document retrieval */
78query_data*
79loadIndexData(char* base_dir, char* index_path, char* text_path)
80{
81  char* index_path_name;
82  char* text_path_name;
83  query_data* qd;
84  int i = 0;
85
86  /* Form the path name of the desired indexes */
87  index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
88  assert(index_path_name != NULL);
89  strcpy(index_path_name, base_dir);
90  strcat(index_path_name, index_path);
91  printf("Index pathname: %s\n", index_path_name);
92
93  text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
94  assert(text_path_name != NULL);
95  strcpy(text_path_name, base_dir);
96  strcat(text_path_name, text_path);
97  printf("Text pathname: %s\n", text_path_name);
98
99  /* Search through the cached indexes for the desired one */
100  while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
101    printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
102    printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
103
104    /* Check if the index has already been loaded */
105    if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
106    (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
107      /* Index has already been loaded and cached, so return it */
108      printf("Found index!\n");
109      free(index_path_name);
110      free(text_path_name);
111      return cached_indexes[i];
112    }
113
114    i++;
115  }
116
117  /* Text strings no longer needed */
118  free(index_path_name);
119  free(text_path_name);
120
121  /* The index is not cached, so load it now */
122  qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
123  if (!qd) {
124    printf("Error: Could not InitQuerySystem()...\n");
125    return NULL;
126  }
127
128  /* The index loaded OK, so cache it */
129  /* This could be a little more sophisticated, eg. replace least frequently used index */
130  if (i >= MAX_INDEXES_CACHED)
131    i = MAX_INDEXES_CACHED - 1;
132
133  /* Free the index being replaced */
134  if (cached_indexes[i] != NULL)
135    FinishQuerySystem(cached_indexes[i]);
136
137  /* Cache the loaded index, and return it */
138  cached_indexes[i] = qd;
139  return cached_indexes[i];
140}
141
142/*
143 * Class:     org_greenstone_mg_MGRetrieveWrapper
144 * Method:    initIDs
145 * Signature: ()V
146 */
147JNIEXPORT void JNICALL
148Java_org_greenstone_mg_MGRetrieveWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
149{
150  /* a long-"J" */
151  FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
152  assert(FID_mg_data != NULL);
153
154}
155
156/*
157 * Class:     org_greenstone_mg_MGRetrieveWrapper
158 * Method:    initCSide
159 * Signature: ()Z
160 */
161JNIEXPORT jboolean JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_initCSide
162(JNIEnv *j_env, jobject j_obj)
163{
164  /* Allocate a MGWrapperData object to store query parameters */
165  MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
166  assert(data != NULL);
167
168  /* Set default values - no stemming, no case-folding, boolean OR queries */
169  data->defaultStemMethod = 0;
170  data->defaultBoolCombine = 0;
171
172  /* Allocate a QueryInfo object to store more query parameters */
173  data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
174  assert(data->queryInfo != NULL);
175
176  /* Set default values - 50 documents max, return term freqs, sort by rank */
177  data->queryInfo->index = NULL;
178  data->queryInfo->maxDocs = 50;
179  data->queryInfo->needTermFreqs = 1;
180
181  /* Save the object on the Java side */
182  (*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
183
184  /* Initialise MG environment variables */
185  InitEnv();
186  SetEnv("expert", "true", NULL);
187  SetEnv("mode", "docnums", NULL);
188 
189  return 1;  /* true - no errors */
190}
191
192/*
193 * Class:     org_greenstone_mg_MGRetrieveWrapper
194 * Method:    unloadIndexData
195 * Signature: ()Z
196 */
197JNIEXPORT jboolean JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_unloadIndexData
198(JNIEnv* j_env, jobject j_obj)
199{
200  /* Free all the loaded indexes */
201  int i = 0;
202  while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
203    FinishQuerySystem(cached_indexes[i]);
204    cached_indexes[i] = NULL;
205    i++;
206  }
207
208  return 1;  /* true - no errors */
209}
210/* Choose MG index to search */
211JNIEXPORT void JNICALL
212Java_org_greenstone_mg_MGRetrieveWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
213                      jstring j_index)
214{
215  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
216
217  /* Get the index name as a C string */
218  const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
219  assert(index != NULL);
220  printf("Choosing index %s...\n", index);
221
222  /* Free the previous index name */
223  if (data->queryInfo->index)
224    free(data->queryInfo->index);
225
226  /* Allocate memory for the index name, and fill it */
227  data->queryInfo->index = (char*) malloc(strlen(index) + 1);
228  assert(data->queryInfo->index != NULL);
229  strcpy(data->queryInfo->index, index);
230
231  /* Release the index string */
232  (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
233}
234
235/*
236 * Class:     org_greenstone_mg_MGRetrieveWrapper
237 * Method:    getDocument
238 * Signature: (Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/String;
239 */
240JNIEXPORT jstring JNICALL Java_org_greenstone_mg_MGRetrieveWrapper_getDocument
241(JNIEnv *j_env, jobject j_obj,
242                         jstring j_base_dir, jstring j_text_path,
243                         jlong j_docnum)
244{
245  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
246
247  char* index_path;
248  const char* base_dir;
249  const char* text_path;
250  query_data* qd;
251
252  mg_u_long pos, len;
253  u_char* c_buffer = NULL;
254  u_char* uc_buffer = NULL;
255  int ULen;
256
257  jstring result;
258
259  /* Make sure an index has been specified */
260  index_path = data->queryInfo->index;
261  assert(index_path != NULL);
262
263  /* Obtain C versions of the two string parameters */
264  base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
265  if (base_dir == NULL) {
266    return NULL;
267  }
268  text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
269  if (text_path == NULL) {
270    (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
271    return NULL;
272  }
273
274  /* Load the appropriate index for satisfying this request */
275  printf("Document retrieval, index path: %s\n", index_path);
276  qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
277
278  /* The C text strings are no longer needed */
279  (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
280  (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
281
282  /* Check that the index was loaded successfully */
283  if (qd==NULL) {
284    return NULL;
285  }
286  /*assert(qd != NULL);*/
287
288  /* Get the document position and length in the text file */
289  printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
290  FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
291  printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
292
293  /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
294  c_buffer = (u_char*) malloc(len);
295  assert(c_buffer != NULL);
296  uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
297  assert(uc_buffer != NULL);
298
299  /* Seek to the correct position in the file and read the document text */
300  Fseek (qd->td->TextFile, pos, 0);
301  Fread (c_buffer, 1, len, qd->td->TextFile);
302
303  /* Decompress the document text into another buffer, and terminate it */
304  DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
305  uc_buffer[ULen] = '\0';
306
307  /* Load the document text into a Java string */
308  result = (*j_env)->NewStringUTF(j_env, uc_buffer);
309  assert(result != NULL);
310
311  /* Free C buffers */
312  free(c_buffer);
313  free(uc_buffer);
314
315  /* Return the document text */
316  return result;
317}
318
Note: See TracBrowser for help on using the browser.