source: main/trunk/greenstone2/common-src/indexers/mg/jni/MGWrapperImpl.c@ 25244

Last change on this file since 25244 was 25244, checked in by ak19, 12 years ago

GS3 tomcat server crashes because java crashes owing to some error in the JNI code. The error may be related to pointers having been stored as int rather than long, an issue that's become noticeable on 64 bit linux machines. Changes have been made in the JNI code where these pointers that are transferred between Java and C++ code are stored (GetIntField and SetIntField to GetLongField and SetLongField, as well as declaration of data_ptr as jlong not jint). Committing code first without debug statements so the commits can easily be done separately.

  • Property svn:keywords set to Author Date Id Revision
File size: 24.6 KB
RevLine 
[3743]1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
[7629]27#include "text_get.h"
28#include "stemmer.h"
[3743]29
30/*************************************************************************
31 NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35 - Sorting results by rank.
36 Done here as a post-processing operation. Could be more efficient in
37 some cases: the current solution is not very good when the number of
38 matching documents is large and the number of desired matches is
39 small. In this case it would be better to iterate through the array
40 picking out the best documents rather than sorting them all.
41
42 - Asking for query term frequencies to be returned.
43 This cannot be turned off in MG. If the term frequencies are not
[4714]44 required, they are simply not passed back to the Java side.
[3743]45
46 - Choosing the index for queries.
47 It is possible for MG to build Section and Document indexes (for
48 example), but these are two separate indexes, and must be loaded
49 separately. This module can load more than one index at a time, thus
50 processing queries to different indexes more quickly.
51 NOTE: This replaces TWO options in the MGPP version: returnLevel and
52 queryLevel.
53
54 *************************************************************************/
55
56
[3981]57#define MAX_INDEXES_CACHED 3
[3743]58
[8919]59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60 for greenstone */
61#if defined(PARADOCNUM) || defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63 int Documents = qd->td->cth.num_of_docs;
64 int *Paragraph = qd->paragraph;
65 int low = 1, high = Documents;
66 int mid = (low+high)/2;
[3743]67
[8919]68 while ((mid = (low+high)/2) >=1 && mid <= Documents)
69 {
70 if (paranum > Paragraph[mid])
71 low = mid+1;
72 else if (paranum <= Paragraph[mid-1])
73 high = mid-1;
74 else
75 return mid;
76 }
77 FatalError(1, "Bad paragraph number.\n");
78 return 0;
79}
80#endif
81
82
[3743]83/*********************************************
84 initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
[3791]92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
[3743]93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110 jclass JC_MGQueryResult;
111
112 /* a long-"J" */
113 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114 assert(FID_mg_data != NULL);
115
116 /* an object -"L<class name>;" */
117 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118 "Lorg/greenstone/mg/MGQueryResult;");
119 assert(FID_query_result != NULL);
120
121 /* the methods we want to use */
122 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124 /* addDoc(long doc, float rank) */
125 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126 assert(MID_addDoc != NULL);
127
[3791]128 /* addTerm(String term, int stem) */
129 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
[3743]130 assert(MID_addTerm != NULL);
131
[3791]132 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134 assert(MID_addEquivTerm != NULL);
135
[3743]136 /* setTotalDocs(long) */
137 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138 assert(MID_setTotalDocs != NULL);
139
140 /* clear(void) */
141 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142 assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147 initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152 /* Allocate a MGWrapperData object to store query parameters */
153 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154 assert(data != NULL);
155
156 /* Set default values - no stemming, no case-folding, boolean OR queries */
157 data->defaultStemMethod = 0;
158 data->defaultBoolCombine = 0;
159
160 /* Allocate a QueryInfo object to store more query parameters */
161 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162 assert(data->queryInfo != NULL);
163
164 /* Set default values - 50 documents max, return term freqs, sort by rank */
165 data->queryInfo->index = NULL;
166 data->queryInfo->maxDocs = 50;
167 data->queryInfo->needTermFreqs = 1;
168
169 /* Save the object on the Java side */
[25244]170 (*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
[3743]171
172 /* Initialise MG environment variables */
173 InitEnv();
174 SetEnv("expert", "true", NULL);
175 SetEnv("mode", "docnums", NULL);
[13288]176
[3743]177 return 1; /* true - no errors */
178}
179
180
181/*******************************************
182 Index caching
183 *******************************************/
184
185query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
186
187
188/* Get the index data necessary to perform a query or document retrieval */
189query_data*
190loadIndexData(char* base_dir, char* index_path, char* text_path)
191{
192 char* index_path_name;
193 char* text_path_name;
194 query_data* qd;
195 int i = 0;
196
197 /* Form the path name of the desired indexes */
198 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
199 assert(index_path_name != NULL);
200 strcpy(index_path_name, base_dir);
201 strcat(index_path_name, index_path);
202 printf("Index pathname: %s\n", index_path_name);
203
204 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
205 assert(text_path_name != NULL);
206 strcpy(text_path_name, base_dir);
207 strcat(text_path_name, text_path);
208 printf("Text pathname: %s\n", text_path_name);
209
210 /* Search through the cached indexes for the desired one */
211 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
212 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
213 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
214
215 /* Check if the index has already been loaded */
216 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
217 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
218 /* Index has already been loaded and cached, so return it */
219 printf("Found index!\n");
220 free(index_path_name);
221 free(text_path_name);
222 return cached_indexes[i];
223 }
224
225 i++;
226 }
227
228 /* Text strings no longer needed */
229 free(index_path_name);
230 free(text_path_name);
231
232 /* The index is not cached, so load it now */
233 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
234 if (!qd) {
235 printf("Error: Could not InitQuerySystem()...\n");
236 return NULL;
237 }
238
239 /* The index loaded OK, so cache it */
240 /* This could be a little more sophisticated, eg. replace least frequently used index */
241 if (i >= MAX_INDEXES_CACHED)
242 i = MAX_INDEXES_CACHED - 1;
243
244 /* Free the index being replaced */
245 if (cached_indexes[i] != NULL)
246 FinishQuerySystem(cached_indexes[i]);
247
248 /* Cache the loaded index, and return it */
249 cached_indexes[i] = qd;
250 return cached_indexes[i];
251}
252
253
254/* Clean up by unloading all cached indexes */
255JNIEXPORT jboolean JNICALL
256Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
257{
258 /* Free all the loaded indexes */
259 int i = 0;
260 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
261 FinishQuerySystem(cached_indexes[i]);
262 cached_indexes[i] = NULL;
263 i++;
264 }
265
266 return 1; /* true - no errors */
267}
268
269
270/****************************************************
271 retrieve a document
272 ****************************************************/
273
274/* Returns a document from mg as a string */
275JNIEXPORT jstring JNICALL
276Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
277 jstring j_base_dir, jstring j_text_path,
278 jlong j_docnum)
279{
[25244]280 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]281
282 char* index_path;
283 const char* base_dir;
284 const char* text_path;
285 query_data* qd;
286
[25147]287 mg_u_long pos, len;
[3743]288 u_char* c_buffer = NULL;
289 u_char* uc_buffer = NULL;
290 int ULen;
291
292 jstring result;
293
294 /* Make sure an index has been specified */
295 index_path = data->queryInfo->index;
296 assert(index_path != NULL);
297
298 /* Obtain C versions of the two string parameters */
299 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
300 if (base_dir == NULL) {
301 return NULL;
302 }
303 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
304 if (text_path == NULL) {
305 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
306 return NULL;
307 }
308
309 /* Load the appropriate index for satisfying this request */
[3791]310 printf("Document retrieval, index path: %s\n", index_path);
[3743]311 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
312
313 /* The C text strings are no longer needed */
314 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
315 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
316
317 /* Check that the index was loaded successfully */
[9874]318 if (qd==NULL) {
319 return NULL;
320 }
321 /*assert(qd != NULL);*/
[3743]322
323 /* Get the document position and length in the text file */
[25147]324 printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
325 FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
326 printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
[3743]327
328 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
329 c_buffer = (u_char*) malloc(len);
330 assert(c_buffer != NULL);
331 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
332 assert(uc_buffer != NULL);
333
334 /* Seek to the correct position in the file and read the document text */
335 Fseek (qd->td->TextFile, pos, 0);
336 Fread (c_buffer, 1, len, qd->td->TextFile);
337
[3791]338 /* Decompress the document text into another buffer, and terminate it */
[3743]339 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
340 uc_buffer[ULen] = '\0';
341
342 /* Load the document text into a Java string */
343 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
344 assert(result != NULL);
345
346 /* Free C buffers */
347 free(c_buffer);
348 free(uc_buffer);
349
350 /* Return the document text */
351 return result;
352}
353
354
355/*******************************************
356 do a query
357 *******************************************/
358
359/* do the actual query - the results are written to query_result held on the Java side */
360JNIEXPORT void JNICALL
361Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
362 jstring j_base_dir, jstring j_text_path,
363 jstring j_query)
364{
[25244]365 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]366
367 char* index_path;
368 const char* base_dir;
369 const char* text_path;
370 query_data* qd;
371
372 jobject result_ptr;
373 char* query;
374 int i, j;
375
376 jthrowable exc;
[9874]377 /* First of all, clear the previous result */
378 /* The result to write to */
379 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
380 assert(result_ptr != NULL);
381
382 /* Clear any previous query results */
383 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
384 exc = (*j_env)->ExceptionOccurred(j_env);
385 if (exc) {
386 (*j_env)->ExceptionDescribe(j_env);
387 return;
388 }
389
[3743]390 /* Make sure an index has been specified */
391 index_path = data->queryInfo->index;
[11021]392 if (index_path == NULL) {
393 return;
394 }
[3743]395
396 /* Obtain C versions of the two string parameters */
397 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
398 if (base_dir == NULL) {
399 return;
400 }
401 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
402 if (text_path == NULL) {
403 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
404 return;
405 }
406
407 /* Load the appropriate index for satisfying this request */
408 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
409
410 /* The C text strings are no longer needed */
411 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
412 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
413
414 /* Check that the index was loaded successfully */
[9874]415 if (qd == NULL) {
416 return;
417 }
[11021]418
[3743]419 /* Remove anything hanging around from last time */
420 FreeQueryDocs(qd);
421
422 /* Obtain a C version of the query string */
423 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
[11021]424 if (query == NULL) {
425 return;
426 }
[3743]427 printf("Searching for query \"%s\"...\n", query);
428
429 /* Make sure the query isn't empty */
430 if (strlen(query) == 0) {
431 printf("Warning: Empty query.\n");
432 return;
433 }
434
[3791]435 /* "Some" queries are done as ranked queries */
[3743]436 if (data->defaultBoolCombine == 0) {
[3791]437 RankedQueryInfo rqi;
438 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
439 rqi.Exact = 1; /* Perform exact ranking */
440 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
441 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
[8919]442 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
443 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
444 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
445 }
446
[3791]447 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
448 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
[7629]449 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
[3791]450 rqi.MaxAccums = 100000;
451 rqi.MaxTerms = -1; /* Use all the query terms */
[7629]452 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
[3791]453 rqi.StopAtMaxAccum = 1;
454 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
455 rqi.skip_dump = NULL; /* Don't dump skip information */
456
457 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
458 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
459 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
460
461 RankedQuery(qd, query, &rqi);
[3743]462 }
[3791]463 /* "All" queries are done as boolean queries */
464 else {
465 BooleanQueryInfo bqi;
466 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
[3743]467
[3791]468 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
469 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
470 }
[3743]471
472 /* Finished with the C query string */
473 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
474
475 /* Check the query was processed successfully */
[11021]476 if (qd->DL == NULL || qd->QTL == NULL || qd->TL == NULL) {
477 return;
478 }
[3743]479
480 /* Record the total number of matching documents */
481 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
482 exc = (*j_env)->ExceptionOccurred(j_env);
483 if (exc) {
484 (*j_env)->ExceptionDescribe(j_env);
485 return;
486 }
487
488 /* Record the matching documents, but only the number requested */
489 printf("Number of matching documents: %d\n", qd->DL->num);
[8919]490
[3743]491 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
492 int doc_num = qd->DL->DE[i].DocNum;
[8920]493 float doc_weight = qd->DL->DE[i].Weight;
494
[8919]495#if defined(PARADOCNUM) || defined(NZDL)
496 if (qd->id->ifh.InvfLevel == 3) {
497 /* pararaph level, need to convert to doc level*/
498 doc_num = GetDocNumFromParaNum(qd, doc_num);
499 }
500#endif
[8920]501
502
[3743]503 /* Call the addDoc function (Java side) to record a matching document */
504 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
505 (jlong) doc_num, (jfloat) doc_weight);
506 exc = (*j_env)->ExceptionOccurred(j_env);
507 if (exc) {
508 (*j_env)->ExceptionDescribe(j_env);
509 return;
510 }
511 }
512
513 /* Record the term information, if desired */
514 if (data->queryInfo->needTermFreqs) {
[3791]515 /* The following code is a lot more complicated than it could be, but it is necessary
516 to compensate for an oddity in MG. */
517 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
518
[3743]519 printf("Number of terms: %d\n", qd->TL->num);
520 printf("Number of query terms: %d\n", qd->QTL->num);
521
[3791]522 /* Generate the stemmed form of each of the relevant terms */
523 for (i = 0; i < qd->TL->num; i++) {
524 u_char* raw_term = qd->TL->TE[i].Word;
525 unsigned int term_length = raw_term[0];
526
527 u_char* raw_stemmed_term = malloc(term_length + 1);
528 unsigned int stemmed_term_length;
529
530 /* Copy the term, and stem it */
531 for (j = 0; j <= term_length; j++)
532 raw_stemmed_term[j] = raw_term[j];
533 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
534
535 /* Allocate memory to store the stemmed term, and fill it */
536 stemmed_term_length = raw_stemmed_term[0];
537 stemmed_terms[i] = malloc(stemmed_term_length + 1);
538 assert(stemmed_terms[i] != NULL);
539 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
540 stemmed_terms[i][stemmed_term_length] = '\0';
541 }
542
543 /* Record every query term, along with their equivalent terms */
[3743]544 for (i = 0; i < qd->QTL->num; i++) {
[3791]545 u_char* raw_query_term = qd->QTL->QTE[i].Term;
546 unsigned int query_term_length = raw_query_term[0];
[3743]547 unsigned char* query_term;
548 jstring j_query_term;
549
[3791]550 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
551 unsigned int stemmed_query_term_length;
552 unsigned char* stemmed_query_term;
553
554 /* Allocate memory to store the query term, and fill it */
555 query_term = malloc(query_term_length + 1);
[3743]556 assert(query_term != NULL);
[3791]557 strncpy(query_term, &(raw_query_term[1]), query_term_length);
[3743]558 query_term[query_term_length] = '\0';
559
560 /* Allocate a new jstring for the query term */
561 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
562 assert(j_query_term != NULL);
563
[3791]564 /* Call the addTerm function (Java side) to record the query term */
565 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
566 j_query_term, (jint) data->defaultStemMethod);
567 exc = (*j_env)->ExceptionOccurred(j_env);
568 if (exc) {
569 (*j_env)->ExceptionDescribe(j_env);
570 return;
571 }
572
573 /* Copy the query term, and stem it */
574 for (j = 0; j <= query_term_length; j++)
575 raw_stemmed_query_term[j] = raw_query_term[j];
576 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
577
578 /* Allocate memory to store the stemmed query term, and fill it */
579 stemmed_query_term_length = raw_stemmed_query_term[0];
580 stemmed_query_term = malloc(stemmed_query_term_length + 1);
581 assert(stemmed_query_term != NULL);
582 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
583 stemmed_query_term[stemmed_query_term_length] = '\0';
584
585 /* Find all the terms equivalent to the query term */
[3743]586 for (j = 0; j < qd->TL->num; j++) {
[3791]587 /* Check if the stemmed query term matches the stemmed term */
588 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
589 u_char* raw_term = qd->TL->TE[j].Word;
590 unsigned int term_length = raw_term[0];
591 unsigned char* term;
592 jstring j_term;
[3743]593
[3791]594 /* Allocate memory to store the query term, and fill it */
595 term = malloc(term_length + 1);
596 assert(term != NULL);
597 strncpy(term, &(raw_term[1]), term_length);
598 term[term_length] = '\0';
[3743]599
[3791]600 /* Allocate a new jstring for the query term */
601 j_term = (*j_env)->NewStringUTF(j_env, term);
602 assert(j_term != NULL);
603
604 /* Call the addEquivTerm function (Java side) to record the equivalent term */
605 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
606 j_query_term, j_term,
607 (jlong) qd->TL->TE[j].WE.doc_count,
608 (jlong) qd->TL->TE[j].WE.count);
[3743]609 exc = (*j_env)->ExceptionOccurred(j_env);
610 if (exc) {
611 (*j_env)->ExceptionDescribe(j_env);
612 return;
613 }
614 }
615 }
616 }
617 }
618}
619
620
621/*******************************************
622 set query options
623 *******************************************/
624
625/* Turn casefolding on or off */
626JNIEXPORT void JNICALL
627Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
628 jboolean j_on)
629{
[25244]630 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]631
632 if (j_on) {
633 data->defaultStemMethod |= 1;
634 } else {
635 data->defaultStemMethod &= 0xe;
636 }
637}
638
639
640/* Turn stemming on or off */
641JNIEXPORT void JNICALL
642Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
643 jboolean j_on)
644{
[25244]645 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]646
647 if (j_on) {
648 data->defaultStemMethod |= 2;
649 } else {
650 data->defaultStemMethod &= 0xd;
651 }
652}
653
654
655/* Set the maximum number of documents to return from a query */
656JNIEXPORT void JNICALL
657Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
658 jint j_max)
659{
[25244]660 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]661 data->queryInfo->maxDocs = j_max;
662}
663
[13288]664/* set the maximum number of numeric to split*/
665JNIEXPORT void JNICALL
666Java_org_greenstone_mg_MGWrapper_setMaxNumeric (JNIEnv *j_env,
667 jobject j_obj,
668 jint j_max) {
[3743]669
[13288]670 char text[20];
671 char* maxnumeric;
672 sprintf(text,"%d",j_max);
673 maxnumeric = text;
674 SetEnv("maxnumeric",maxnumeric, NULL);
675}
676
677
[3743]678/* Turn term frequency recording on or off */
679JNIEXPORT void JNICALL
680Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
681 jboolean j_on)
682{
[25244]683 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]684 data->queryInfo->needTermFreqs = j_on;
685}
686
687
688/* Choose MG index to search */
689JNIEXPORT void JNICALL
690Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
691 jstring j_index)
692{
[25244]693 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]694
695 /* Get the index name as a C string */
696 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
697 assert(index != NULL);
698 printf("Choosing index %s...\n", index);
699
700 /* Free the previous index name */
701 if (data->queryInfo->index)
702 free(data->queryInfo->index);
703
704 /* Allocate memory for the index name, and fill it */
705 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
706 assert(data->queryInfo->index != NULL);
707 strcpy(data->queryInfo->index, index);
708
709 /* Release the index string */
710 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
711}
712
713
714/* Choose boolean AND or boolean OR queries */
715JNIEXPORT void JNICALL
716Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
717 jint j_mode)
718{
[25244]719 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]720 data->defaultBoolCombine = j_mode;
721}
722
723
724/* Get a text representation of the current parameter values */
725JNIEXPORT jstring JNICALL
726Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
727{
[25244]728 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]729 char result[512]; /* Assume this is big enough */
730
731 /* Print the data to a character array */
[25147]732 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
[3743]733 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
734 (data->defaultStemMethod & 1),
735 (data->defaultStemMethod & 2),
736 (data->defaultBoolCombine == 1 ? "all" : "some"),
737 (data->queryInfo->maxDocs));
738
739 /* Convert to a jstring, and return */
740 return (*j_env)->NewStringUTF(j_env, result);
741}
Note: See TracBrowser for help on using the repository browser.