source: trunk/indexers/mg/jni/MGWrapperImpl.c@ 11021

Last change on this file since 11021 was 11021, checked in by kjdon, 18 years ago

asserts if they fail kill tomcat, so changed some asserts to if statements

  • Property svn:keywords set to Author Date Id Revision
File size: 24.3 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30/*************************************************************************
31 NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35 - Sorting results by rank.
36 Done here as a post-processing operation. Could be more efficient in
37 some cases: the current solution is not very good when the number of
38 matching documents is large and the number of desired matches is
39 small. In this case it would be better to iterate through the array
40 picking out the best documents rather than sorting them all.
41
42 - Asking for query term frequencies to be returned.
43 This cannot be turned off in MG. If the term frequencies are not
44 required, they are simply not passed back to the Java side.
45
46 - Choosing the index for queries.
47 It is possible for MG to build Section and Document indexes (for
48 example), but these are two separate indexes, and must be loaded
49 separately. This module can load more than one index at a time, thus
50 processing queries to different indexes more quickly.
51 NOTE: This replaces TWO options in the MGPP version: returnLevel and
52 queryLevel.
53
54 *************************************************************************/
55
56
57#define MAX_INDEXES_CACHED 3
58
59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60 for greenstone */
61#if defined(PARADOCNUM) || defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63 int Documents = qd->td->cth.num_of_docs;
64 int *Paragraph = qd->paragraph;
65 int low = 1, high = Documents;
66 int mid = (low+high)/2;
67
68 while ((mid = (low+high)/2) >=1 && mid <= Documents)
69 {
70 if (paranum > Paragraph[mid])
71 low = mid+1;
72 else if (paranum <= Paragraph[mid-1])
73 high = mid-1;
74 else
75 return mid;
76 }
77 FatalError(1, "Bad paragraph number.\n");
78 return 0;
79}
80#endif
81
82
83/*********************************************
84 initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110 jclass JC_MGQueryResult;
111
112 /* a long-"J" */
113 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114 assert(FID_mg_data != NULL);
115
116 /* an object -"L<class name>;" */
117 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118 "Lorg/greenstone/mg/MGQueryResult;");
119 assert(FID_query_result != NULL);
120
121 /* the methods we want to use */
122 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124 /* addDoc(long doc, float rank) */
125 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126 assert(MID_addDoc != NULL);
127
128 /* addTerm(String term, int stem) */
129 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130 assert(MID_addTerm != NULL);
131
132 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134 assert(MID_addEquivTerm != NULL);
135
136 /* setTotalDocs(long) */
137 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138 assert(MID_setTotalDocs != NULL);
139
140 /* clear(void) */
141 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142 assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147 initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152 /* Allocate a MGWrapperData object to store query parameters */
153 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154 assert(data != NULL);
155
156 /* Set default values - no stemming, no case-folding, boolean OR queries */
157 data->defaultStemMethod = 0;
158 data->defaultBoolCombine = 0;
159
160 /* Allocate a QueryInfo object to store more query parameters */
161 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162 assert(data->queryInfo != NULL);
163
164 /* Set default values - 50 documents max, return term freqs, sort by rank */
165 data->queryInfo->index = NULL;
166 data->queryInfo->maxDocs = 50;
167 data->queryInfo->needTermFreqs = 1;
168
169 /* Save the object on the Java side */
170 (*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172 /* Initialise MG environment variables */
173 InitEnv();
174 SetEnv("expert", "true", NULL);
175 SetEnv("mode", "docnums", NULL);
176 return 1; /* true - no errors */
177}
178
179
180/*******************************************
181 Index caching
182 *******************************************/
183
184query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187/* Get the index data necessary to perform a query or document retrieval */
188query_data*
189loadIndexData(char* base_dir, char* index_path, char* text_path)
190{
191 char* index_path_name;
192 char* text_path_name;
193 query_data* qd;
194 int i = 0;
195
196 /* Form the path name of the desired indexes */
197 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198 assert(index_path_name != NULL);
199 strcpy(index_path_name, base_dir);
200 strcat(index_path_name, index_path);
201 printf("Index pathname: %s\n", index_path_name);
202
203 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204 assert(text_path_name != NULL);
205 strcpy(text_path_name, base_dir);
206 strcat(text_path_name, text_path);
207 printf("Text pathname: %s\n", text_path_name);
208
209 /* Search through the cached indexes for the desired one */
210 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214 /* Check if the index has already been loaded */
215 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217 /* Index has already been loaded and cached, so return it */
218 printf("Found index!\n");
219 free(index_path_name);
220 free(text_path_name);
221 return cached_indexes[i];
222 }
223
224 i++;
225 }
226
227 /* Text strings no longer needed */
228 free(index_path_name);
229 free(text_path_name);
230
231 /* The index is not cached, so load it now */
232 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233 if (!qd) {
234 printf("Error: Could not InitQuerySystem()...\n");
235 return NULL;
236 }
237
238 /* The index loaded OK, so cache it */
239 /* This could be a little more sophisticated, eg. replace least frequently used index */
240 if (i >= MAX_INDEXES_CACHED)
241 i = MAX_INDEXES_CACHED - 1;
242
243 /* Free the index being replaced */
244 if (cached_indexes[i] != NULL)
245 FinishQuerySystem(cached_indexes[i]);
246
247 /* Cache the loaded index, and return it */
248 cached_indexes[i] = qd;
249 return cached_indexes[i];
250}
251
252
253/* Clean up by unloading all cached indexes */
254JNIEXPORT jboolean JNICALL
255Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256{
257 /* Free all the loaded indexes */
258 int i = 0;
259 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260 FinishQuerySystem(cached_indexes[i]);
261 cached_indexes[i] = NULL;
262 i++;
263 }
264
265 return 1; /* true - no errors */
266}
267
268
269/****************************************************
270 retrieve a document
271 ****************************************************/
272
273/* Returns a document from mg as a string */
274JNIEXPORT jstring JNICALL
275Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276 jstring j_base_dir, jstring j_text_path,
277 jlong j_docnum)
278{
279 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281 char* index_path;
282 const char* base_dir;
283 const char* text_path;
284 query_data* qd;
285
286 u_long pos, len;
287 u_char* c_buffer = NULL;
288 u_char* uc_buffer = NULL;
289 int ULen;
290
291 jstring result;
292
293 /* Make sure an index has been specified */
294 index_path = data->queryInfo->index;
295 assert(index_path != NULL);
296
297 /* Obtain C versions of the two string parameters */
298 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299 if (base_dir == NULL) {
300 return NULL;
301 }
302 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303 if (text_path == NULL) {
304 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305 return NULL;
306 }
307
308 /* Load the appropriate index for satisfying this request */
309 printf("Document retrieval, index path: %s\n", index_path);
310 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
311
312 /* The C text strings are no longer needed */
313 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316 /* Check that the index was loaded successfully */
317 if (qd==NULL) {
318 return NULL;
319 }
320 /*assert(qd != NULL);*/
321
322 /* Get the document position and length in the text file */
323 printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
324 FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
325 printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
326
327 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
328 c_buffer = (u_char*) malloc(len);
329 assert(c_buffer != NULL);
330 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
331 assert(uc_buffer != NULL);
332
333 /* Seek to the correct position in the file and read the document text */
334 Fseek (qd->td->TextFile, pos, 0);
335 Fread (c_buffer, 1, len, qd->td->TextFile);
336
337 /* Decompress the document text into another buffer, and terminate it */
338 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
339 uc_buffer[ULen] = '\0';
340
341 /* Load the document text into a Java string */
342 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
343 assert(result != NULL);
344
345 /* Free C buffers */
346 free(c_buffer);
347 free(uc_buffer);
348
349 /* Return the document text */
350 return result;
351}
352
353
354/*******************************************
355 do a query
356 *******************************************/
357
358/* do the actual query - the results are written to query_result held on the Java side */
359JNIEXPORT void JNICALL
360Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
361 jstring j_base_dir, jstring j_text_path,
362 jstring j_query)
363{
364 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
365
366 char* index_path;
367 const char* base_dir;
368 const char* text_path;
369 query_data* qd;
370
371 jobject result_ptr;
372 char* query;
373 int i, j;
374
375 jthrowable exc;
376 /* First of all, clear the previous result */
377 /* The result to write to */
378 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
379 assert(result_ptr != NULL);
380
381 /* Clear any previous query results */
382 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
383 exc = (*j_env)->ExceptionOccurred(j_env);
384 if (exc) {
385 (*j_env)->ExceptionDescribe(j_env);
386 return;
387 }
388
389 /* Make sure an index has been specified */
390 index_path = data->queryInfo->index;
391 if (index_path == NULL) {
392 return;
393 }
394
395 /* Obtain C versions of the two string parameters */
396 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
397 if (base_dir == NULL) {
398 return;
399 }
400 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
401 if (text_path == NULL) {
402 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
403 return;
404 }
405
406 /* Load the appropriate index for satisfying this request */
407 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
408
409 /* The C text strings are no longer needed */
410 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
411 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
412
413 /* Check that the index was loaded successfully */
414 if (qd == NULL) {
415 return;
416 }
417
418 /* Remove anything hanging around from last time */
419 FreeQueryDocs(qd);
420
421 /* Obtain a C version of the query string */
422 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
423 if (query == NULL) {
424 return;
425 }
426 printf("Searching for query \"%s\"...\n", query);
427
428 /* Make sure the query isn't empty */
429 if (strlen(query) == 0) {
430 printf("Warning: Empty query.\n");
431 return;
432 }
433
434 /* "Some" queries are done as ranked queries */
435 if (data->defaultBoolCombine == 0) {
436 RankedQueryInfo rqi;
437 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
438 rqi.Exact = 1; /* Perform exact ranking */
439 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
440 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
441 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
442 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
443 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
444 }
445
446 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
447 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
448 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
449 rqi.MaxAccums = 100000;
450 rqi.MaxTerms = -1; /* Use all the query terms */
451 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
452 rqi.StopAtMaxAccum = 1;
453 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
454 rqi.skip_dump = NULL; /* Don't dump skip information */
455
456 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
457 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
458 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
459
460 RankedQuery(qd, query, &rqi);
461 }
462 /* "All" queries are done as boolean queries */
463 else {
464 BooleanQueryInfo bqi;
465 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
466
467 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
468 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
469 }
470
471 /* Finished with the C query string */
472 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
473
474 /* Check the query was processed successfully */
475 if (qd->DL == NULL || qd->QTL == NULL || qd->TL == NULL) {
476 return;
477 }
478
479 /* Record the total number of matching documents */
480 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
481 exc = (*j_env)->ExceptionOccurred(j_env);
482 if (exc) {
483 (*j_env)->ExceptionDescribe(j_env);
484 return;
485 }
486
487 /* Record the matching documents, but only the number requested */
488 printf("Number of matching documents: %d\n", qd->DL->num);
489
490 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
491 int doc_num = qd->DL->DE[i].DocNum;
492 float doc_weight = qd->DL->DE[i].Weight;
493
494#if defined(PARADOCNUM) || defined(NZDL)
495 if (qd->id->ifh.InvfLevel == 3) {
496 /* pararaph level, need to convert to doc level*/
497 doc_num = GetDocNumFromParaNum(qd, doc_num);
498 }
499#endif
500
501
502 /* Call the addDoc function (Java side) to record a matching document */
503 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
504 (jlong) doc_num, (jfloat) doc_weight);
505 exc = (*j_env)->ExceptionOccurred(j_env);
506 if (exc) {
507 (*j_env)->ExceptionDescribe(j_env);
508 return;
509 }
510 }
511
512 /* Record the term information, if desired */
513 if (data->queryInfo->needTermFreqs) {
514 /* The following code is a lot more complicated than it could be, but it is necessary
515 to compensate for an oddity in MG. */
516 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
517
518 printf("Number of terms: %d\n", qd->TL->num);
519 printf("Number of query terms: %d\n", qd->QTL->num);
520
521 /* Generate the stemmed form of each of the relevant terms */
522 for (i = 0; i < qd->TL->num; i++) {
523 u_char* raw_term = qd->TL->TE[i].Word;
524 unsigned int term_length = raw_term[0];
525
526 u_char* raw_stemmed_term = malloc(term_length + 1);
527 unsigned int stemmed_term_length;
528
529 /* Copy the term, and stem it */
530 for (j = 0; j <= term_length; j++)
531 raw_stemmed_term[j] = raw_term[j];
532 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
533
534 /* Allocate memory to store the stemmed term, and fill it */
535 stemmed_term_length = raw_stemmed_term[0];
536 stemmed_terms[i] = malloc(stemmed_term_length + 1);
537 assert(stemmed_terms[i] != NULL);
538 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
539 stemmed_terms[i][stemmed_term_length] = '\0';
540 }
541
542 /* Record every query term, along with their equivalent terms */
543 for (i = 0; i < qd->QTL->num; i++) {
544 u_char* raw_query_term = qd->QTL->QTE[i].Term;
545 unsigned int query_term_length = raw_query_term[0];
546 unsigned char* query_term;
547 jstring j_query_term;
548
549 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
550 unsigned int stemmed_query_term_length;
551 unsigned char* stemmed_query_term;
552
553 /* Allocate memory to store the query term, and fill it */
554 query_term = malloc(query_term_length + 1);
555 assert(query_term != NULL);
556 strncpy(query_term, &(raw_query_term[1]), query_term_length);
557 query_term[query_term_length] = '\0';
558
559 /* Allocate a new jstring for the query term */
560 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
561 assert(j_query_term != NULL);
562
563 /* Call the addTerm function (Java side) to record the query term */
564 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
565 j_query_term, (jint) data->defaultStemMethod);
566 exc = (*j_env)->ExceptionOccurred(j_env);
567 if (exc) {
568 (*j_env)->ExceptionDescribe(j_env);
569 return;
570 }
571
572 /* Copy the query term, and stem it */
573 for (j = 0; j <= query_term_length; j++)
574 raw_stemmed_query_term[j] = raw_query_term[j];
575 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
576
577 /* Allocate memory to store the stemmed query term, and fill it */
578 stemmed_query_term_length = raw_stemmed_query_term[0];
579 stemmed_query_term = malloc(stemmed_query_term_length + 1);
580 assert(stemmed_query_term != NULL);
581 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
582 stemmed_query_term[stemmed_query_term_length] = '\0';
583
584 /* Find all the terms equivalent to the query term */
585 for (j = 0; j < qd->TL->num; j++) {
586 /* Check if the stemmed query term matches the stemmed term */
587 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
588 u_char* raw_term = qd->TL->TE[j].Word;
589 unsigned int term_length = raw_term[0];
590 unsigned char* term;
591 jstring j_term;
592
593 /* Allocate memory to store the query term, and fill it */
594 term = malloc(term_length + 1);
595 assert(term != NULL);
596 strncpy(term, &(raw_term[1]), term_length);
597 term[term_length] = '\0';
598
599 /* Allocate a new jstring for the query term */
600 j_term = (*j_env)->NewStringUTF(j_env, term);
601 assert(j_term != NULL);
602
603 /* Call the addEquivTerm function (Java side) to record the equivalent term */
604 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
605 j_query_term, j_term,
606 (jlong) qd->TL->TE[j].WE.doc_count,
607 (jlong) qd->TL->TE[j].WE.count);
608 exc = (*j_env)->ExceptionOccurred(j_env);
609 if (exc) {
610 (*j_env)->ExceptionDescribe(j_env);
611 return;
612 }
613 }
614 }
615 }
616 }
617}
618
619
620/*******************************************
621 set query options
622 *******************************************/
623
624/* Turn casefolding on or off */
625JNIEXPORT void JNICALL
626Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
627 jboolean j_on)
628{
629 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
630
631 if (j_on) {
632 data->defaultStemMethod |= 1;
633 } else {
634 data->defaultStemMethod &= 0xe;
635 }
636}
637
638
639/* Turn stemming on or off */
640JNIEXPORT void JNICALL
641Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
642 jboolean j_on)
643{
644 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
645
646 if (j_on) {
647 data->defaultStemMethod |= 2;
648 } else {
649 data->defaultStemMethod &= 0xd;
650 }
651}
652
653
654/* Set the maximum number of documents to return from a query */
655JNIEXPORT void JNICALL
656Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
657 jint j_max)
658{
659 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
660 data->queryInfo->maxDocs = j_max;
661}
662
663
664/* Turn term frequency recording on or off */
665JNIEXPORT void JNICALL
666Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
667 jboolean j_on)
668{
669 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
670 data->queryInfo->needTermFreqs = j_on;
671}
672
673
674/* Choose MG index to search */
675JNIEXPORT void JNICALL
676Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
677 jstring j_index)
678{
679 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
680
681 /* Get the index name as a C string */
682 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
683 assert(index != NULL);
684 printf("Choosing index %s...\n", index);
685
686 /* Free the previous index name */
687 if (data->queryInfo->index)
688 free(data->queryInfo->index);
689
690 /* Allocate memory for the index name, and fill it */
691 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
692 assert(data->queryInfo->index != NULL);
693 strcpy(data->queryInfo->index, index);
694
695 /* Release the index string */
696 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
697}
698
699
700/* Choose boolean AND or boolean OR queries */
701JNIEXPORT void JNICALL
702Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
703 jint j_mode)
704{
705 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
706 data->defaultBoolCombine = j_mode;
707}
708
709
710/* Get a text representation of the current parameter values */
711JNIEXPORT jstring JNICALL
712Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
713{
714 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
715 char result[512]; /* Assume this is big enough */
716
717 /* Print the data to a character array */
718 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
719 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
720 (data->defaultStemMethod & 1),
721 (data->defaultStemMethod & 2),
722 (data->defaultBoolCombine == 1 ? "all" : "some"),
723 (data->queryInfo->maxDocs));
724
725 /* Convert to a jstring, and return */
726 return (*j_env)->NewStringUTF(j_env, result);
727}
Note: See TracBrowser for help on using the repository browser.