source: trunk/gsdl3/packages/mg/jni/MGWrapperImpl.c@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:keywords set to Author Date Id Revision
File size: 24.3 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30/*************************************************************************
31 NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35 - Sorting results by rank.
36 Done here as a post-processing operation. Could be more efficient in
37 some cases: the current solution is not very good when the number of
38 matching documents is large and the number of desired matches is
39 small. In this case it would be better to iterate through the array
40 picking out the best documents rather than sorting them all.
41
42 - Asking for query term frequencies to be returned.
43 This cannot be turned off in MG. If the term frequencies are not
44 required, they are simply not passed back to the Java side.
45
46 - Choosing the index for queries.
47 It is possible for MG to build Section and Document indexes (for
48 example), but these are two separate indexes, and must be loaded
49 separately. This module can load more than one index at a time, thus
50 processing queries to different indexes more quickly.
51 NOTE: This replaces TWO options in the MGPP version: returnLevel and
52 queryLevel.
53
54 *************************************************************************/
55
56
57#define MAX_INDEXES_CACHED 3
58
59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60 for greenstone */
61#if defined(PARADOCNUM) || defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63 int Documents = qd->td->cth.num_of_docs;
64 int *Paragraph = qd->paragraph;
65 int low = 1, high = Documents;
66 int mid = (low+high)/2;
67
68 while ((mid = (low+high)/2) >=1 && mid <= Documents)
69 {
70 if (paranum > Paragraph[mid])
71 low = mid+1;
72 else if (paranum <= Paragraph[mid-1])
73 high = mid-1;
74 else
75 return mid;
76 }
77 FatalError(1, "Bad paragraph number.\n");
78 return 0;
79}
80#endif
81
82
83/*********************************************
84 initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110 jclass JC_MGQueryResult;
111
112 /* a long-"J" */
113 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114 assert(FID_mg_data != NULL);
115
116 /* an object -"L<class name>;" */
117 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118 "Lorg/greenstone/mg/MGQueryResult;");
119 assert(FID_query_result != NULL);
120
121 /* the methods we want to use */
122 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124 /* addDoc(long doc, float rank) */
125 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126 assert(MID_addDoc != NULL);
127
128 /* addTerm(String term, int stem) */
129 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130 assert(MID_addTerm != NULL);
131
132 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134 assert(MID_addEquivTerm != NULL);
135
136 /* setTotalDocs(long) */
137 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138 assert(MID_setTotalDocs != NULL);
139
140 /* clear(void) */
141 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142 assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147 initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152 /* Allocate a MGWrapperData object to store query parameters */
153 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154 assert(data != NULL);
155
156 /* Set default values - no stemming, no case-folding, boolean OR queries */
157 data->defaultStemMethod = 0;
158 data->defaultBoolCombine = 0;
159
160 /* Allocate a QueryInfo object to store more query parameters */
161 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162 assert(data->queryInfo != NULL);
163
164 /* Set default values - 50 documents max, return term freqs, sort by rank */
165 data->queryInfo->index = NULL;
166 data->queryInfo->maxDocs = 50;
167 data->queryInfo->needTermFreqs = 1;
168
169 /* Save the object on the Java side */
170 (*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172 /* Initialise MG environment variables */
173 InitEnv();
174 SetEnv("expert", "true", NULL);
175 SetEnv("mode", "docnums", NULL);
176 return 1; /* true - no errors */
177}
178
179
180/*******************************************
181 Index caching
182 *******************************************/
183
184query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187/* Get the index data necessary to perform a query or document retrieval */
188query_data*
189loadIndexData(char* base_dir, char* index_path, char* text_path)
190{
191 char* index_path_name;
192 char* text_path_name;
193 query_data* qd;
194 int i = 0;
195
196 /* Form the path name of the desired indexes */
197 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198 assert(index_path_name != NULL);
199 strcpy(index_path_name, base_dir);
200 strcat(index_path_name, index_path);
201 printf("Index pathname: %s\n", index_path_name);
202
203 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204 assert(text_path_name != NULL);
205 strcpy(text_path_name, base_dir);
206 strcat(text_path_name, text_path);
207 printf("Text pathname: %s\n", text_path_name);
208
209 /* Search through the cached indexes for the desired one */
210 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214 /* Check if the index has already been loaded */
215 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217 /* Index has already been loaded and cached, so return it */
218 printf("Found index!\n");
219 free(index_path_name);
220 free(text_path_name);
221 return cached_indexes[i];
222 }
223
224 i++;
225 }
226
227 /* Text strings no longer needed */
228 free(index_path_name);
229 free(text_path_name);
230
231 /* The index is not cached, so load it now */
232 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233 if (!qd) {
234 printf("Error: Could not InitQuerySystem()...\n");
235 return NULL;
236 }
237
238 /* The index loaded OK, so cache it */
239 /* This could be a little more sophisticated, eg. replace least frequently used index */
240 if (i >= MAX_INDEXES_CACHED)
241 i = MAX_INDEXES_CACHED - 1;
242
243 /* Free the index being replaced */
244 if (cached_indexes[i] != NULL)
245 FinishQuerySystem(cached_indexes[i]);
246
247 /* Cache the loaded index, and return it */
248 cached_indexes[i] = qd;
249 return cached_indexes[i];
250}
251
252
253/* Clean up by unloading all cached indexes */
254JNIEXPORT jboolean JNICALL
255Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256{
257 /* Free all the loaded indexes */
258 int i = 0;
259 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260 FinishQuerySystem(cached_indexes[i]);
261 cached_indexes[i] = NULL;
262 i++;
263 }
264
265 return 1; /* true - no errors */
266}
267
268
269/****************************************************
270 retrieve a document
271 ****************************************************/
272
273/* Returns a document from mg as a string */
274JNIEXPORT jstring JNICALL
275Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276 jstring j_base_dir, jstring j_text_path,
277 jlong j_docnum)
278{
279 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281 char* index_path;
282 const char* base_dir;
283 const char* text_path;
284 query_data* qd;
285
286 u_long pos, len;
287 u_char* c_buffer = NULL;
288 u_char* uc_buffer = NULL;
289 int ULen;
290
291 jstring result;
292
293 /* Make sure an index has been specified */
294 index_path = data->queryInfo->index;
295 assert(index_path != NULL);
296
297 /* Obtain C versions of the two string parameters */
298 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299 if (base_dir == NULL) {
300 return NULL;
301 }
302 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303 if (text_path == NULL) {
304 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305 return NULL;
306 }
307
308 /* Load the appropriate index for satisfying this request */
309 printf("Document retrieval, index path: %s\n", index_path);
310 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
311
312 /* The C text strings are no longer needed */
313 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316 /* Check that the index was loaded successfully */
317 if (qd==NULL) {
318 return NULL;
319 }
320 /*assert(qd != NULL);*/
321
322 /* Get the document position and length in the text file */
323 printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
324 FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
325 printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
326
327 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
328 c_buffer = (u_char*) malloc(len);
329 assert(c_buffer != NULL);
330 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
331 assert(uc_buffer != NULL);
332
333 /* Seek to the correct position in the file and read the document text */
334 Fseek (qd->td->TextFile, pos, 0);
335 Fread (c_buffer, 1, len, qd->td->TextFile);
336
337 /* Decompress the document text into another buffer, and terminate it */
338 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
339 uc_buffer[ULen] = '\0';
340
341 /* Load the document text into a Java string */
342 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
343 assert(result != NULL);
344
345 /* Free C buffers */
346 free(c_buffer);
347 free(uc_buffer);
348
349 /* Return the document text */
350 return result;
351}
352
353
354/*******************************************
355 do a query
356 *******************************************/
357
358/* do the actual query - the results are written to query_result held on the Java side */
359JNIEXPORT void JNICALL
360Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
361 jstring j_base_dir, jstring j_text_path,
362 jstring j_query)
363{
364 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
365
366 char* index_path;
367 const char* base_dir;
368 const char* text_path;
369 query_data* qd;
370
371 jobject result_ptr;
372 char* query;
373 int i, j;
374
375 jthrowable exc;
376
377 /* First of all, clear the previous result */
378 /* The result to write to */
379 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
380 assert(result_ptr != NULL);
381
382 /* Clear any previous query results */
383 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
384 exc = (*j_env)->ExceptionOccurred(j_env);
385 if (exc) {
386 (*j_env)->ExceptionDescribe(j_env);
387 return;
388 }
389
390 /* Make sure an index has been specified */
391 index_path = data->queryInfo->index;
392 assert(index_path != NULL);
393
394 /* Obtain C versions of the two string parameters */
395 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
396 if (base_dir == NULL) {
397 return;
398 }
399 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
400 if (text_path == NULL) {
401 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
402 return;
403 }
404
405 /* Load the appropriate index for satisfying this request */
406 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
407
408 /* The C text strings are no longer needed */
409 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
410 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
411
412 /* Check that the index was loaded successfully */
413 if (qd == NULL) {
414 return;
415 }
416 /*assert(qd != NULL);*/
417
418 /* Remove anything hanging around from last time */
419 FreeQueryDocs(qd);
420
421 /* Obtain a C version of the query string */
422 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
423 assert(query != NULL);
424 printf("Searching for query \"%s\"...\n", query);
425
426 /* Make sure the query isn't empty */
427 if (strlen(query) == 0) {
428 printf("Warning: Empty query.\n");
429 return;
430 }
431
432 /* "Some" queries are done as ranked queries */
433 if (data->defaultBoolCombine == 0) {
434 RankedQueryInfo rqi;
435 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
436 rqi.Exact = 1; /* Perform exact ranking */
437 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
438 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
439 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
440 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
441 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
442 }
443
444 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
445 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
446 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
447 rqi.MaxAccums = 100000;
448 rqi.MaxTerms = -1; /* Use all the query terms */
449 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
450 rqi.StopAtMaxAccum = 1;
451 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
452 rqi.skip_dump = NULL; /* Don't dump skip information */
453
454 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
455 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
456 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
457
458 RankedQuery(qd, query, &rqi);
459 }
460 /* "All" queries are done as boolean queries */
461 else {
462 BooleanQueryInfo bqi;
463 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
464
465 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
466 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
467 }
468
469 /* Finished with the C query string */
470 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
471
472 /* Check the query was processed successfully */
473 assert(qd->DL != NULL);
474 assert(qd->QTL != NULL);
475 assert(qd->TL != NULL);
476
477 /* Record the total number of matching documents */
478 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
479 exc = (*j_env)->ExceptionOccurred(j_env);
480 if (exc) {
481 (*j_env)->ExceptionDescribe(j_env);
482 return;
483 }
484
485 /* Record the matching documents, but only the number requested */
486 printf("Number of matching documents: %d\n", qd->DL->num);
487
488 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
489 int doc_num = qd->DL->DE[i].DocNum;
490 float doc_weight = qd->DL->DE[i].Weight;
491
492#if defined(PARADOCNUM) || defined(NZDL)
493 if (qd->id->ifh.InvfLevel == 3) {
494 /* pararaph level, need to convert to doc level*/
495 doc_num = GetDocNumFromParaNum(qd, doc_num);
496 }
497#endif
498
499
500 /* Call the addDoc function (Java side) to record a matching document */
501 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
502 (jlong) doc_num, (jfloat) doc_weight);
503 exc = (*j_env)->ExceptionOccurred(j_env);
504 if (exc) {
505 (*j_env)->ExceptionDescribe(j_env);
506 return;
507 }
508 }
509
510 /* Record the term information, if desired */
511 if (data->queryInfo->needTermFreqs) {
512 /* The following code is a lot more complicated than it could be, but it is necessary
513 to compensate for an oddity in MG. */
514 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
515
516 printf("Number of terms: %d\n", qd->TL->num);
517 printf("Number of query terms: %d\n", qd->QTL->num);
518
519 /* Generate the stemmed form of each of the relevant terms */
520 for (i = 0; i < qd->TL->num; i++) {
521 u_char* raw_term = qd->TL->TE[i].Word;
522 unsigned int term_length = raw_term[0];
523
524 u_char* raw_stemmed_term = malloc(term_length + 1);
525 unsigned int stemmed_term_length;
526
527 /* Copy the term, and stem it */
528 for (j = 0; j <= term_length; j++)
529 raw_stemmed_term[j] = raw_term[j];
530 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
531
532 /* Allocate memory to store the stemmed term, and fill it */
533 stemmed_term_length = raw_stemmed_term[0];
534 stemmed_terms[i] = malloc(stemmed_term_length + 1);
535 assert(stemmed_terms[i] != NULL);
536 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
537 stemmed_terms[i][stemmed_term_length] = '\0';
538 }
539
540 /* Record every query term, along with their equivalent terms */
541 for (i = 0; i < qd->QTL->num; i++) {
542 u_char* raw_query_term = qd->QTL->QTE[i].Term;
543 unsigned int query_term_length = raw_query_term[0];
544 unsigned char* query_term;
545 jstring j_query_term;
546
547 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
548 unsigned int stemmed_query_term_length;
549 unsigned char* stemmed_query_term;
550
551 /* Allocate memory to store the query term, and fill it */
552 query_term = malloc(query_term_length + 1);
553 assert(query_term != NULL);
554 strncpy(query_term, &(raw_query_term[1]), query_term_length);
555 query_term[query_term_length] = '\0';
556
557 /* Allocate a new jstring for the query term */
558 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
559 assert(j_query_term != NULL);
560
561 /* Call the addTerm function (Java side) to record the query term */
562 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
563 j_query_term, (jint) data->defaultStemMethod);
564 exc = (*j_env)->ExceptionOccurred(j_env);
565 if (exc) {
566 (*j_env)->ExceptionDescribe(j_env);
567 return;
568 }
569
570 /* Copy the query term, and stem it */
571 for (j = 0; j <= query_term_length; j++)
572 raw_stemmed_query_term[j] = raw_query_term[j];
573 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
574
575 /* Allocate memory to store the stemmed query term, and fill it */
576 stemmed_query_term_length = raw_stemmed_query_term[0];
577 stemmed_query_term = malloc(stemmed_query_term_length + 1);
578 assert(stemmed_query_term != NULL);
579 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
580 stemmed_query_term[stemmed_query_term_length] = '\0';
581
582 /* Find all the terms equivalent to the query term */
583 for (j = 0; j < qd->TL->num; j++) {
584 /* Check if the stemmed query term matches the stemmed term */
585 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
586 u_char* raw_term = qd->TL->TE[j].Word;
587 unsigned int term_length = raw_term[0];
588 unsigned char* term;
589 jstring j_term;
590
591 /* Allocate memory to store the query term, and fill it */
592 term = malloc(term_length + 1);
593 assert(term != NULL);
594 strncpy(term, &(raw_term[1]), term_length);
595 term[term_length] = '\0';
596
597 /* Allocate a new jstring for the query term */
598 j_term = (*j_env)->NewStringUTF(j_env, term);
599 assert(j_term != NULL);
600
601 /* Call the addEquivTerm function (Java side) to record the equivalent term */
602 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
603 j_query_term, j_term,
604 (jlong) qd->TL->TE[j].WE.doc_count,
605 (jlong) qd->TL->TE[j].WE.count);
606 exc = (*j_env)->ExceptionOccurred(j_env);
607 if (exc) {
608 (*j_env)->ExceptionDescribe(j_env);
609 return;
610 }
611 }
612 }
613 }
614 }
615}
616
617
618/*******************************************
619 set query options
620 *******************************************/
621
622/* Turn casefolding on or off */
623JNIEXPORT void JNICALL
624Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
625 jboolean j_on)
626{
627 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
628
629 if (j_on) {
630 data->defaultStemMethod |= 1;
631 } else {
632 data->defaultStemMethod &= 0xe;
633 }
634}
635
636
637/* Turn stemming on or off */
638JNIEXPORT void JNICALL
639Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
640 jboolean j_on)
641{
642 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
643
644 if (j_on) {
645 data->defaultStemMethod |= 2;
646 } else {
647 data->defaultStemMethod &= 0xd;
648 }
649}
650
651
652/* Set the maximum number of documents to return from a query */
653JNIEXPORT void JNICALL
654Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
655 jint j_max)
656{
657 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
658 data->queryInfo->maxDocs = j_max;
659}
660
661
662/* Turn term frequency recording on or off */
663JNIEXPORT void JNICALL
664Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
665 jboolean j_on)
666{
667 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
668 data->queryInfo->needTermFreqs = j_on;
669}
670
671
672/* Choose MG index to search */
673JNIEXPORT void JNICALL
674Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
675 jstring j_index)
676{
677 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
678
679 /* Get the index name as a C string */
680 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
681 assert(index != NULL);
682 printf("Choosing index %s...\n", index);
683
684 /* Free the previous index name */
685 if (data->queryInfo->index)
686 free(data->queryInfo->index);
687
688 /* Allocate memory for the index name, and fill it */
689 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
690 assert(data->queryInfo->index != NULL);
691 strcpy(data->queryInfo->index, index);
692
693 /* Release the index string */
694 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
695}
696
697
698/* Choose boolean AND or boolean OR queries */
699JNIEXPORT void JNICALL
700Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
701 jint j_mode)
702{
703 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
704 data->defaultBoolCombine = j_mode;
705}
706
707
708/* Get a text representation of the current parameter values */
709JNIEXPORT jstring JNICALL
710Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
711{
712 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
713 char result[512]; /* Assume this is big enough */
714
715 /* Print the data to a character array */
716 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
717 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
718 (data->defaultStemMethod & 1),
719 (data->defaultStemMethod & 2),
720 (data->defaultBoolCombine == 1 ? "all" : "some"),
721 (data->queryInfo->maxDocs));
722
723 /* Convert to a jstring, and return */
724 return (*j_env)->NewStringUTF(j_env, result);
725}
Note: See TracBrowser for help on using the repository browser.