root/main/trunk/greenstone2/common-src/indexers/mg/jni/MGWrapperImpl.c @ 25244

Revision 25244, 24.6 KB (checked in by ak19, 8 years ago)

GS3 tomcat server crashes because java crashes owing to some error in the JNI code. The error may be related to pointers having been stored as int rather than long, an issue that's become noticeable on 64 bit linux machines. Changes have been made in the JNI code where these pointers that are transferred between Java and C++ code are stored (GetIntField? and SetIntField? to GetLongField? and SetLongField?, as well as declaration of data_ptr as jlong not jint). Committing code first without debug statements so the commits can easily be done separately.

  • Property svn:keywords set to Author Date Id Revision
Line 
1/*
2 *    MGWrapperImpl.c
3 *    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 *    This program is free software; you can redistribute it and/or modify
6 *    it under the terms of the GNU General Public License as published by
7 *    the Free Software Foundation; either version 2 of the License, or
8 *    (at your option) any later version.
9 *
10 *    This program is distributed in the hope that it will be useful,
11 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 *    GNU General Public License for more details.
14 *
15 *    You should have received a copy of the GNU General Public License
16 *    along with this program; if not, write to the Free Software
17 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30/*************************************************************************
31   NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35   - Sorting results by rank.
36     Done here as a post-processing operation. Could be more efficient in
37     some cases: the current solution is not very good when the number of
38     matching documents is large and the number of desired matches is
39     small. In this case it would be better to iterate through the array
40     picking out the best documents rather than sorting them all.
41
42   - Asking for query term frequencies to be returned.
43     This cannot be turned off in MG. If the term frequencies are not
44     required, they are simply not passed back to the Java side.
45
46   - Choosing the index for queries.
47     It is possible for MG to build Section and Document indexes (for
48     example), but these are two separate indexes, and must be loaded
49     separately. This module can load more than one index at a time, thus
50     processing queries to different indexes more quickly.
51     NOTE: This replaces TWO options in the MGPP version: returnLevel and
52           queryLevel.
53
54 *************************************************************************/
55
56
57#define MAX_INDEXES_CACHED 3
58
59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60   for greenstone */
61#if defined(PARADOCNUM) ||  defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63  int Documents = qd->td->cth.num_of_docs;
64  int *Paragraph = qd->paragraph;
65  int low = 1, high = Documents;
66  int mid = (low+high)/2;
67
68  while ((mid = (low+high)/2) >=1 && mid <= Documents)
69    {
70      if (paranum > Paragraph[mid])
71        low = mid+1;
72      else if (paranum <= Paragraph[mid-1])
73        high = mid-1;
74      else
75        return mid;
76    }
77  FatalError(1, "Bad paragraph number.\n");
78  return 0;
79}
80#endif
81
82
83/*********************************************
84   initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110  jclass JC_MGQueryResult;
111
112  /* a long-"J" */
113  FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114  assert(FID_mg_data != NULL);
115
116  /* an object -"L<class name>;" */
117  FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118                      "Lorg/greenstone/mg/MGQueryResult;");
119  assert(FID_query_result != NULL);
120
121  /* the methods we want to use */
122  JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124  /* addDoc(long doc, float rank) */
125  MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126  assert(MID_addDoc != NULL);
127
128  /* addTerm(String term, int stem) */
129  MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130  assert(MID_addTerm != NULL);
131
132  /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133  MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134  assert(MID_addEquivTerm != NULL);
135
136  /* setTotalDocs(long) */
137  MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138  assert(MID_setTotalDocs != NULL);
139
140  /* clear(void) */
141  MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142  assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147   initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152  /* Allocate a MGWrapperData object to store query parameters */
153  MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154  assert(data != NULL);
155
156  /* Set default values - no stemming, no case-folding, boolean OR queries */
157  data->defaultStemMethod = 0;
158  data->defaultBoolCombine = 0;
159
160  /* Allocate a QueryInfo object to store more query parameters */
161  data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162  assert(data->queryInfo != NULL);
163
164  /* Set default values - 50 documents max, return term freqs, sort by rank */
165  data->queryInfo->index = NULL;
166  data->queryInfo->maxDocs = 50;
167  data->queryInfo->needTermFreqs = 1;
168
169  /* Save the object on the Java side */
170  (*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
171
172  /* Initialise MG environment variables */
173  InitEnv();
174  SetEnv("expert", "true", NULL);
175  SetEnv("mode", "docnums", NULL);
176 
177  return 1;  /* true - no errors */
178}
179
180
181/*******************************************
182   Index caching
183 *******************************************/
184
185query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
186
187
188/* Get the index data necessary to perform a query or document retrieval */
189query_data*
190loadIndexData(char* base_dir, char* index_path, char* text_path)
191{
192  char* index_path_name;
193  char* text_path_name;
194  query_data* qd;
195  int i = 0;
196
197  /* Form the path name of the desired indexes */
198  index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
199  assert(index_path_name != NULL);
200  strcpy(index_path_name, base_dir);
201  strcat(index_path_name, index_path);
202  printf("Index pathname: %s\n", index_path_name);
203
204  text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
205  assert(text_path_name != NULL);
206  strcpy(text_path_name, base_dir);
207  strcat(text_path_name, text_path);
208  printf("Text pathname: %s\n", text_path_name);
209
210  /* Search through the cached indexes for the desired one */
211  while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
212    printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
213    printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
214
215    /* Check if the index has already been loaded */
216    if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
217    (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
218      /* Index has already been loaded and cached, so return it */
219      printf("Found index!\n");
220      free(index_path_name);
221      free(text_path_name);
222      return cached_indexes[i];
223    }
224
225    i++;
226  }
227
228  /* Text strings no longer needed */
229  free(index_path_name);
230  free(text_path_name);
231
232  /* The index is not cached, so load it now */
233  qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
234  if (!qd) {
235    printf("Error: Could not InitQuerySystem()...\n");
236    return NULL;
237  }
238
239  /* The index loaded OK, so cache it */
240  /* This could be a little more sophisticated, eg. replace least frequently used index */
241  if (i >= MAX_INDEXES_CACHED)
242    i = MAX_INDEXES_CACHED - 1;
243
244  /* Free the index being replaced */
245  if (cached_indexes[i] != NULL)
246    FinishQuerySystem(cached_indexes[i]);
247
248  /* Cache the loaded index, and return it */
249  cached_indexes[i] = qd;
250  return cached_indexes[i];
251}
252
253
254/* Clean up by unloading all cached indexes */
255JNIEXPORT jboolean JNICALL
256Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
257{
258  /* Free all the loaded indexes */
259  int i = 0;
260  while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
261    FinishQuerySystem(cached_indexes[i]);
262    cached_indexes[i] = NULL;
263    i++;
264  }
265
266  return 1;  /* true - no errors */
267}
268
269
270/****************************************************
271   retrieve a document
272 ****************************************************/
273
274/* Returns a document from mg as a string */
275JNIEXPORT jstring JNICALL
276Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
277                         jstring j_base_dir, jstring j_text_path,
278                         jlong j_docnum)
279{
280  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
281
282  char* index_path;
283  const char* base_dir;
284  const char* text_path;
285  query_data* qd;
286
287  mg_u_long pos, len;
288  u_char* c_buffer = NULL;
289  u_char* uc_buffer = NULL;
290  int ULen;
291
292  jstring result;
293
294  /* Make sure an index has been specified */
295  index_path = data->queryInfo->index;
296  assert(index_path != NULL);
297
298  /* Obtain C versions of the two string parameters */
299  base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
300  if (base_dir == NULL) {
301    return NULL;
302  }
303  text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
304  if (text_path == NULL) {
305    (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
306    return NULL;
307  }
308
309  /* Load the appropriate index for satisfying this request */
310  printf("Document retrieval, index path: %s\n", index_path);
311  qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
312
313  /* The C text strings are no longer needed */
314  (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
315  (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
316
317  /* Check that the index was loaded successfully */
318  if (qd==NULL) {
319    return NULL;
320  }
321  /*assert(qd != NULL);*/
322
323  /* Get the document position and length in the text file */
324  printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
325  FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
326  printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
327
328  /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
329  c_buffer = (u_char*) malloc(len);
330  assert(c_buffer != NULL);
331  uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
332  assert(uc_buffer != NULL);
333
334  /* Seek to the correct position in the file and read the document text */
335  Fseek (qd->td->TextFile, pos, 0);
336  Fread (c_buffer, 1, len, qd->td->TextFile);
337
338  /* Decompress the document text into another buffer, and terminate it */
339  DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
340  uc_buffer[ULen] = '\0';
341
342  /* Load the document text into a Java string */
343  result = (*j_env)->NewStringUTF(j_env, uc_buffer);
344  assert(result != NULL);
345
346  /* Free C buffers */
347  free(c_buffer);
348  free(uc_buffer);
349
350  /* Return the document text */
351  return result;
352}
353
354
355/*******************************************
356   do a query
357 *******************************************/
358
359/* do the actual query - the results are written to query_result held on the Java side */
360JNIEXPORT void JNICALL
361Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
362                      jstring j_base_dir, jstring j_text_path,
363                      jstring j_query)
364{
365  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
366
367  char* index_path;
368  const char* base_dir;
369  const char* text_path;
370  query_data* qd;
371
372  jobject result_ptr;
373  char* query;
374  int i, j;
375
376  jthrowable exc;
377  /* First of all, clear the previous result */
378  /* The result to write to */
379  result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
380  assert(result_ptr != NULL);
381
382  /* Clear any previous query results */
383  (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
384  exc = (*j_env)->ExceptionOccurred(j_env);
385  if (exc) {
386    (*j_env)->ExceptionDescribe(j_env);
387    return;
388  }
389
390  /* Make sure an index has been specified */
391  index_path = data->queryInfo->index;
392  if (index_path == NULL) {
393    return;
394  }
395
396  /* Obtain C versions of the two string parameters */
397  base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
398  if (base_dir == NULL) {
399    return;
400  }
401  text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
402  if (text_path == NULL) {
403    (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
404    return;
405  }
406
407  /* Load the appropriate index for satisfying this request */
408  qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
409
410  /* The C text strings are no longer needed */
411  (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
412  (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
413
414  /* Check that the index was loaded successfully */
415  if (qd == NULL) {
416    return;
417  }
418 
419  /* Remove anything hanging around from last time */
420  FreeQueryDocs(qd);
421
422  /* Obtain a C version of the query string */
423  query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
424  if (query == NULL) {
425    return;
426  }
427  printf("Searching for query \"%s\"...\n", query);
428
429  /* Make sure the query isn't empty */
430  if (strlen(query) == 0) {
431    printf("Warning: Empty query.\n");
432    return;
433  }
434
435  /* "Some" queries are done as ranked queries */
436  if (data->defaultBoolCombine == 0) {
437    RankedQueryInfo rqi;
438    rqi.QueryFreqs = 1;  /* Use the frequency of each query term in the query - OK? */
439    rqi.Exact = 1;  /* Perform exact ranking */
440    rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;  /* Get only the desired number */
441    rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve;  /* OK? */
442    /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
443    if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
444      rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
445    }
446   
447    rqi.Sort = 1;  /* Sort the query terms by frequency before ranking */
448    rqi.AccumMethod = 'L';  /* Use a list when accumulating (has bugs though...) */
449    /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
450    rqi.MaxAccums = 100000;
451    rqi.MaxTerms = -1;  /* Use all the query terms */
452    /* rqi.StopAtMaxAccum = 0;*/  /* Don't care (using as many accumulators as necessary) */
453    rqi.StopAtMaxAccum = 1;
454    rqi.HashTblSize = 1000;  /* Don't care (not using a hash table) */
455    rqi.skip_dump = NULL;  /* Don't dump skip information */
456
457    /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
458    SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
459    SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
460
461    RankedQuery(qd, query, &rqi);
462  }
463  /* "All" queries are done as boolean queries */
464  else {
465    BooleanQueryInfo bqi;
466    bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
467
468    /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
469    BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
470  }
471
472  /* Finished with the C query string */
473  (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
474
475  /* Check the query was processed successfully */
476  if (qd->DL == NULL || qd->QTL == NULL || qd->TL == NULL) {
477    return;
478  }
479
480  /* Record the total number of matching documents */
481  (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
482  exc = (*j_env)->ExceptionOccurred(j_env);
483  if (exc) {
484    (*j_env)->ExceptionDescribe(j_env);
485    return;
486  }
487
488  /* Record the matching documents, but only the number requested */
489  printf("Number of matching documents: %d\n", qd->DL->num);
490 
491  for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
492    int doc_num = qd->DL->DE[i].DocNum;
493    float doc_weight = qd->DL->DE[i].Weight;
494
495#if defined(PARADOCNUM) || defined(NZDL)
496    if (qd->id->ifh.InvfLevel == 3) {
497      /* pararaph level, need to convert to doc level*/
498      doc_num = GetDocNumFromParaNum(qd, doc_num);
499    }
500#endif
501   
502   
503    /* Call the addDoc function (Java side) to record a matching document */
504    (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
505                 (jlong) doc_num, (jfloat) doc_weight);
506    exc = (*j_env)->ExceptionOccurred(j_env);
507    if (exc) {
508      (*j_env)->ExceptionDescribe(j_env);
509      return;
510    }
511  }
512
513  /* Record the term information, if desired */
514  if (data->queryInfo->needTermFreqs) {
515    /* The following code is a lot more complicated than it could be, but it is necessary
516       to compensate for an oddity in MG. */
517    unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
518
519    printf("Number of terms: %d\n", qd->TL->num);
520    printf("Number of query terms: %d\n", qd->QTL->num);
521
522    /* Generate the stemmed form of each of the relevant terms */
523    for (i = 0; i < qd->TL->num; i++) {
524      u_char* raw_term = qd->TL->TE[i].Word;
525      unsigned int term_length = raw_term[0];
526
527      u_char* raw_stemmed_term = malloc(term_length + 1);
528      unsigned int stemmed_term_length;
529
530      /* Copy the term, and stem it */
531      for (j = 0; j <= term_length; j++)
532    raw_stemmed_term[j] = raw_term[j];
533      stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
534
535      /* Allocate memory to store the stemmed term, and fill it */
536      stemmed_term_length = raw_stemmed_term[0];
537      stemmed_terms[i] = malloc(stemmed_term_length + 1);
538      assert(stemmed_terms[i] != NULL);
539      strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
540      stemmed_terms[i][stemmed_term_length] = '\0';
541    }
542
543    /* Record every query term, along with their equivalent terms */
544    for (i = 0; i < qd->QTL->num; i++) {
545      u_char* raw_query_term = qd->QTL->QTE[i].Term;
546      unsigned int query_term_length = raw_query_term[0];
547      unsigned char* query_term;
548      jstring j_query_term;
549
550      u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
551      unsigned int stemmed_query_term_length;
552      unsigned char* stemmed_query_term;
553
554      /* Allocate memory to store the query term, and fill it */
555      query_term = malloc(query_term_length + 1);
556      assert(query_term != NULL);
557      strncpy(query_term, &(raw_query_term[1]), query_term_length);
558      query_term[query_term_length] = '\0';
559
560      /* Allocate a new jstring for the query term */
561      j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
562      assert(j_query_term != NULL);
563
564      /* Call the addTerm function (Java side) to record the query term */
565      (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
566                   j_query_term, (jint) data->defaultStemMethod);
567      exc = (*j_env)->ExceptionOccurred(j_env);
568      if (exc) {
569    (*j_env)->ExceptionDescribe(j_env);
570    return;
571      }
572
573      /* Copy the query term, and stem it */
574      for (j = 0; j <= query_term_length; j++)
575    raw_stemmed_query_term[j] = raw_query_term[j];
576      stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
577
578      /* Allocate memory to store the stemmed query term, and fill it */
579      stemmed_query_term_length = raw_stemmed_query_term[0];
580      stemmed_query_term = malloc(stemmed_query_term_length + 1);
581      assert(stemmed_query_term != NULL);
582      strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
583      stemmed_query_term[stemmed_query_term_length] = '\0';
584
585      /* Find all the terms equivalent to the query term */
586      for (j = 0; j < qd->TL->num; j++) {
587    /* Check if the stemmed query term matches the stemmed term */
588    if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
589      u_char* raw_term = qd->TL->TE[j].Word;
590      unsigned int term_length = raw_term[0];
591      unsigned char* term;
592      jstring j_term;
593
594      /* Allocate memory to store the query term, and fill it */
595      term = malloc(term_length + 1);
596      assert(term != NULL);
597      strncpy(term, &(raw_term[1]), term_length);
598      term[term_length] = '\0';
599
600      /* Allocate a new jstring for the query term */
601      j_term = (*j_env)->NewStringUTF(j_env, term);
602      assert(j_term != NULL);
603
604      /* Call the addEquivTerm function (Java side) to record the equivalent term */
605      (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
606                   j_query_term, j_term,
607                   (jlong) qd->TL->TE[j].WE.doc_count,
608                   (jlong) qd->TL->TE[j].WE.count);
609      exc = (*j_env)->ExceptionOccurred(j_env);
610      if (exc) {
611        (*j_env)->ExceptionDescribe(j_env);
612        return;
613      }
614    }
615      }
616    }
617  }
618}
619
620
621/*******************************************
622   set query options
623 *******************************************/
624
625/* Turn casefolding on or off */
626JNIEXPORT void JNICALL
627Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
628                     jboolean j_on)
629{
630  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
631
632  if (j_on) {
633    data->defaultStemMethod |= 1;
634  } else {
635    data->defaultStemMethod &= 0xe;
636  }
637}
638
639
640/* Turn stemming on or off */
641JNIEXPORT void JNICALL
642Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
643                     jboolean j_on)
644{
645  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
646
647  if (j_on) {
648    data->defaultStemMethod |= 2;
649  } else {
650    data->defaultStemMethod &= 0xd;
651  }
652}
653
654
655/* Set the maximum number of documents to return from a query */
656JNIEXPORT void JNICALL
657Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
658                        jint j_max)
659{
660  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
661  data->queryInfo->maxDocs = j_max;
662}
663
664/* set the maximum number of numeric to split*/
665JNIEXPORT void JNICALL
666Java_org_greenstone_mg_MGWrapper_setMaxNumeric (JNIEnv *j_env,
667                         jobject j_obj,
668                         jint j_max) {
669
670  char text[20];
671  char* maxnumeric;
672  sprintf(text,"%d",j_max);
673  maxnumeric = text; 
674  SetEnv("maxnumeric",maxnumeric, NULL);
675}
676
677
678/* Turn term frequency recording on or off */
679JNIEXPORT void JNICALL
680Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
681                        jboolean j_on)
682{
683  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
684  data->queryInfo->needTermFreqs = j_on;
685}
686
687
688/* Choose MG index to search */
689JNIEXPORT void JNICALL
690Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
691                      jstring j_index)
692{
693  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
694
695  /* Get the index name as a C string */
696  const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
697  assert(index != NULL);
698  printf("Choosing index %s...\n", index);
699
700  /* Free the previous index name */
701  if (data->queryInfo->index)
702    free(data->queryInfo->index);
703
704  /* Allocate memory for the index name, and fill it */
705  data->queryInfo->index = (char*) malloc(strlen(index) + 1);
706  assert(data->queryInfo->index != NULL);
707  strcpy(data->queryInfo->index, index);
708
709  /* Release the index string */
710  (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
711}
712
713
714/* Choose boolean AND or boolean OR queries */
715JNIEXPORT void JNICALL
716Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
717                          jint j_mode)
718{
719  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
720  data->defaultBoolCombine = j_mode;
721}
722
723
724/* Get a text representation of the current parameter values */
725JNIEXPORT jstring JNICALL
726Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
727{
728  MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
729  char result[512];  /* Assume this is big enough */
730
731  /* Print the data to a character array */
732  sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
733      (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
734      (data->defaultStemMethod & 1),
735      (data->defaultStemMethod & 2),
736      (data->defaultBoolCombine == 1 ? "all" : "some"),
737      (data->queryInfo->maxDocs));
738
739  /* Convert to a jstring, and return */
740  return (*j_env)->NewStringUTF(j_env, result);
741}
Note: See TracBrowser for help on using the browser.