source: trunk/gsdl3/packages/mg/jni/MGWrapperImpl.c@ 3791

Last change on this file since 3791 was 3791, checked in by mdewsnip, 21 years ago

"Some" queries are now performed as ranked queries rather than boolean OR queries. Equivalent terms for each of the query terms are also recorded.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27
28
29/*************************************************************************
30 NOTES
31
32 - Features supported by MGPP but not by MG (AFAIK)
33
34 - Sorting results by rank.
35 Done here as a post-processing operation. Could be more efficient in
36 some cases: the current solution is not very good when the number of
37 matching documents is large and the number of desired matches is
38 small. In this case it would be better to iterate through the array
39 picking out the best documents rather than sorting them all.
40
41 - Asking for query term frequencies to be returned.
42 This cannot be turned off in MG. If the term frequencies are not
43 required, they are simply not passed bassed to the Java side.
44
45 - Choosing the index for queries.
46 It is possible for MG to build Section and Document indexes (for
47 example), but these are two separate indexes, and must be loaded
48 separately. This module can load more than one index at a time, thus
49 processing queries to different indexes more quickly.
50 NOTE: This replaces TWO options in the MGPP version: returnLevel and
51 queryLevel.
52
53 - "Some" (boolean OR) queries.
54 In boolean querying mode, MG assumes query terms are joined by ANDs.
55 There is no way to tell MG to treat the query terms as if they were
56 joined by ORs. Here, "some" queries are handled by explicitly
57 inserting OR ("|") characters between the query terms.
58
59 *************************************************************************
60 TO DO
61
62 - Find out about necessity of other query term information
63 (eg. equivalent terms).
64
65 *************************************************************************/
66
67
68#define MAX_INDEXES_CACHED 2
69
70
71/*********************************************
72 initialisation stuff
73 *********************************************/
74
75/* cached ids for java stuff */
76jfieldID FID_mg_data = NULL; /* MGWrapperData */
77jfieldID FID_query_result = NULL; /* MGQueryResult */
78jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
79jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
80jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
81jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
82jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
83
84
85/* to access objects and methods on java side, need their field/method ids -
86 this initialises them at the start to avoid recalculating them each time they
87 are needed
88Note: the descriptors need to be exactly right, otherwise you get an error
89saying "no such field" but no reference to the fact that it has the right
90name but the wrong type.
91Note: apparently the jclass is a local ref and should only work
92in the method that created it. It seems to work ok, but I'll make it
93global cos the book said I should, and it may avoid future hassles.
94*/
95JNIEXPORT void JNICALL
96Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
97{
98 jclass JC_MGQueryResult;
99
100 /* a long-"J" */
101 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
102 assert(FID_mg_data != NULL);
103
104 /* an object -"L<class name>;" */
105 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
106 "Lorg/greenstone/mg/MGQueryResult;");
107 assert(FID_query_result != NULL);
108
109 /* the methods we want to use */
110 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
111
112 /* addDoc(long doc, float rank) */
113 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
114 assert(MID_addDoc != NULL);
115
116 /* addTerm(String term, int stem) */
117 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
118 assert(MID_addTerm != NULL);
119
120 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
121 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
122 assert(MID_addEquivTerm != NULL);
123
124 /* setTotalDocs(long) */
125 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
126 assert(MID_setTotalDocs != NULL);
127
128 /* clear(void) */
129 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
130 assert(MID_clearResult != NULL);
131}
132
133
134/* the java side MGWrapper has a pointer to a C object - MGWrapperData
135 initialise this and set the pointer
136*/
137JNIEXPORT jboolean JNICALL
138Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
139{
140 /* Allocate a MGWrapperData object to store query parameters */
141 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
142 assert(data != NULL);
143
144 /* Set default values - no stemming, no case-folding, boolean OR queries */
145 data->defaultStemMethod = 0;
146 data->defaultBoolCombine = 0;
147
148 /* Allocate a QueryInfo object to store more query parameters */
149 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
150 assert(data->queryInfo != NULL);
151
152 /* Set default values - 50 documents max, return term freqs, sort by rank */
153 data->queryInfo->index = NULL;
154 data->queryInfo->maxDocs = 50;
155 data->queryInfo->needTermFreqs = 1;
156
157 /* Save the object on the Java side */
158 (*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
159
160 /* Initialise MG environment variables */
161 InitEnv();
162 SetEnv("expert", "true", NULL);
163 SetEnv("mode", "docnums", NULL);
164 return 1; /* true - no errors */
165}
166
167
168/*******************************************
169 Index caching
170 *******************************************/
171
172query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
173
174
175/* Get the index data necessary to perform a query or document retrieval */
176query_data*
177loadIndexData(char* base_dir, char* index_path, char* text_path)
178{
179 char* index_path_name;
180 char* text_path_name;
181 query_data* qd;
182 int i = 0;
183
184 /* Form the path name of the desired indexes */
185 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
186 assert(index_path_name != NULL);
187 strcpy(index_path_name, base_dir);
188 strcat(index_path_name, index_path);
189 printf("Index pathname: %s\n", index_path_name);
190
191 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
192 assert(text_path_name != NULL);
193 strcpy(text_path_name, base_dir);
194 strcat(text_path_name, text_path);
195 printf("Text pathname: %s\n", text_path_name);
196
197 /* Search through the cached indexes for the desired one */
198 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
199 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
200 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
201
202 /* Check if the index has already been loaded */
203 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
204 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
205 /* Index has already been loaded and cached, so return it */
206 printf("Found index!\n");
207 free(index_path_name);
208 free(text_path_name);
209 return cached_indexes[i];
210 }
211
212 i++;
213 }
214
215 /* Text strings no longer needed */
216 free(index_path_name);
217 free(text_path_name);
218
219 /* The index is not cached, so load it now */
220 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
221 if (!qd) {
222 printf("Error: Could not InitQuerySystem()...\n");
223 return NULL;
224 }
225
226 /* The index loaded OK, so cache it */
227 /* This could be a little more sophisticated, eg. replace least frequently used index */
228 if (i >= MAX_INDEXES_CACHED)
229 i = MAX_INDEXES_CACHED - 1;
230
231 /* Free the index being replaced */
232 if (cached_indexes[i] != NULL)
233 FinishQuerySystem(cached_indexes[i]);
234
235 /* Cache the loaded index, and return it */
236 cached_indexes[i] = qd;
237 return cached_indexes[i];
238}
239
240
241/* Clean up by unloading all cached indexes */
242JNIEXPORT jboolean JNICALL
243Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
244{
245 /* Free all the loaded indexes */
246 int i = 0;
247 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
248 FinishQuerySystem(cached_indexes[i]);
249 cached_indexes[i] = NULL;
250 i++;
251 }
252
253 return 1; /* true - no errors */
254}
255
256
257/****************************************************
258 retrieve a document
259 ****************************************************/
260
261/* Returns a document from mg as a string */
262JNIEXPORT jstring JNICALL
263Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
264 jstring j_base_dir, jstring j_text_path,
265 jlong j_docnum)
266{
267 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
268
269 char* index_path;
270 const char* base_dir;
271 const char* text_path;
272 query_data* qd;
273
274 u_long pos, len;
275 u_char* c_buffer = NULL;
276 u_char* uc_buffer = NULL;
277 int ULen;
278
279 jstring result;
280
281 /* Make sure an index has been specified */
282 index_path = data->queryInfo->index;
283 assert(index_path != NULL);
284
285 /* Obtain C versions of the two string parameters */
286 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
287 if (base_dir == NULL) {
288 return NULL;
289 }
290 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
291 if (text_path == NULL) {
292 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
293 return NULL;
294 }
295
296 /* Load the appropriate index for satisfying this request */
297 printf("Document retrieval, index path: %s\n", index_path);
298 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
299
300 /* The C text strings are no longer needed */
301 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
302 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
303
304 /* Check that the index was loaded successfully */
305 assert(qd != NULL);
306
307 /* Get the document position and length in the text file */
308 printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
309 FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
310 printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
311
312 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
313 c_buffer = (u_char*) malloc(len);
314 assert(c_buffer != NULL);
315 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
316 assert(uc_buffer != NULL);
317
318 /* Seek to the correct position in the file and read the document text */
319 Fseek (qd->td->TextFile, pos, 0);
320 Fread (c_buffer, 1, len, qd->td->TextFile);
321
322 /* Decompress the document text into another buffer, and terminate it */
323 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
324 uc_buffer[ULen] = '\0';
325
326 /* Load the document text into a Java string */
327 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
328 assert(result != NULL);
329
330 /* Free C buffers */
331 free(c_buffer);
332 free(uc_buffer);
333
334 /* Return the document text */
335 return result;
336}
337
338
339/*******************************************
340 do a query
341 *******************************************/
342
343/* do the actual query - the results are written to query_result held on the Java side */
344JNIEXPORT void JNICALL
345Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
346 jstring j_base_dir, jstring j_text_path,
347 jstring j_query)
348{
349 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
350
351 char* index_path;
352 const char* base_dir;
353 const char* text_path;
354 query_data* qd;
355
356 jobject result_ptr;
357 char* query;
358 int i, j;
359
360 jthrowable exc;
361
362 /* Make sure an index has been specified */
363 index_path = data->queryInfo->index;
364 assert(index_path != NULL);
365
366 /* Obtain C versions of the two string parameters */
367 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
368 if (base_dir == NULL) {
369 return;
370 }
371 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
372 if (text_path == NULL) {
373 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
374 return;
375 }
376
377 /* Load the appropriate index for satisfying this request */
378 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
379
380 /* The C text strings are no longer needed */
381 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
382 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
383
384 /* Check that the index was loaded successfully */
385 assert(qd != NULL);
386
387 /* Remove anything hanging around from last time */
388 FreeQueryDocs(qd);
389
390 /* The result to write to */
391 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
392 assert(result_ptr != NULL);
393
394 /* Clear any previous query results */
395 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
396 exc = (*j_env)->ExceptionOccurred(j_env);
397 if (exc) {
398 (*j_env)->ExceptionDescribe(j_env);
399 return;
400 }
401
402 /* Obtain a C version of the query string */
403 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
404 assert(query != NULL);
405 printf("Searching for query \"%s\"...\n", query);
406
407 /* Make sure the query isn't empty */
408 if (strlen(query) == 0) {
409 printf("Warning: Empty query.\n");
410 return;
411 }
412
413 /* "Some" queries are done as ranked queries */
414 if (data->defaultBoolCombine == 0) {
415 RankedQueryInfo rqi;
416 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
417 rqi.Exact = 1; /* Perform exact ranking */
418 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
419 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
420 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
421 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
422 /* rqi.MaxAccums = -1; /* Use as many accumulators as necessary - CRASHES with list */
423 rqi.MaxAccums = 100000;
424 rqi.MaxTerms = -1; /* Use all the query terms */
425 /* rqi.StopAtMaxAccum = 0; /* Don't care (using as many accumulators as necessary) */
426 rqi.StopAtMaxAccum = 1;
427 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
428 rqi.skip_dump = NULL; /* Don't dump skip information */
429
430 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
431 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
432 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
433
434 RankedQuery(qd, query, &rqi);
435 }
436 /* "All" queries are done as boolean queries */
437 else {
438 BooleanQueryInfo bqi;
439 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
440
441 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
442 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
443 }
444
445 /* Finished with the C query string */
446 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
447
448 /* Check the query was processed successfully */
449 assert(qd->DL != NULL);
450 assert(qd->QTL != NULL);
451 assert(qd->TL != NULL);
452
453 /* Record the total number of matching documents */
454 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
455 exc = (*j_env)->ExceptionOccurred(j_env);
456 if (exc) {
457 (*j_env)->ExceptionDescribe(j_env);
458 return;
459 }
460
461 /* Record the matching documents, but only the number requested */
462 printf("Number of matching documents: %d\n", qd->DL->num);
463 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
464 int doc_num = qd->DL->DE[i].DocNum;
465 float doc_weight = qd->DL->DE[i].Weight;
466
467 /* Call the addDoc function (Java side) to record a matching document */
468 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
469 (jlong) doc_num, (jfloat) doc_weight);
470 exc = (*j_env)->ExceptionOccurred(j_env);
471 if (exc) {
472 (*j_env)->ExceptionDescribe(j_env);
473 return;
474 }
475 }
476
477 /* Record the term information, if desired */
478 if (data->queryInfo->needTermFreqs) {
479 /* The following code is a lot more complicated than it could be, but it is necessary
480 to compensate for an oddity in MG. */
481 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
482
483 printf("Number of terms: %d\n", qd->TL->num);
484 printf("Number of query terms: %d\n", qd->QTL->num);
485
486 /* Generate the stemmed form of each of the relevant terms */
487 for (i = 0; i < qd->TL->num; i++) {
488 u_char* raw_term = qd->TL->TE[i].Word;
489 unsigned int term_length = raw_term[0];
490
491 u_char* raw_stemmed_term = malloc(term_length + 1);
492 unsigned int stemmed_term_length;
493
494 /* Copy the term, and stem it */
495 for (j = 0; j <= term_length; j++)
496 raw_stemmed_term[j] = raw_term[j];
497 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
498
499 /* Allocate memory to store the stemmed term, and fill it */
500 stemmed_term_length = raw_stemmed_term[0];
501 stemmed_terms[i] = malloc(stemmed_term_length + 1);
502 assert(stemmed_terms[i] != NULL);
503 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
504 stemmed_terms[i][stemmed_term_length] = '\0';
505 }
506
507 /* Record every query term, along with their equivalent terms */
508 for (i = 0; i < qd->QTL->num; i++) {
509 u_char* raw_query_term = qd->QTL->QTE[i].Term;
510 unsigned int query_term_length = raw_query_term[0];
511 unsigned char* query_term;
512 jstring j_query_term;
513
514 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
515 unsigned int stemmed_query_term_length;
516 unsigned char* stemmed_query_term;
517
518 /* Allocate memory to store the query term, and fill it */
519 query_term = malloc(query_term_length + 1);
520 assert(query_term != NULL);
521 strncpy(query_term, &(raw_query_term[1]), query_term_length);
522 query_term[query_term_length] = '\0';
523
524 /* Allocate a new jstring for the query term */
525 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
526 assert(j_query_term != NULL);
527
528 /* Call the addTerm function (Java side) to record the query term */
529 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
530 j_query_term, (jint) data->defaultStemMethod);
531 exc = (*j_env)->ExceptionOccurred(j_env);
532 if (exc) {
533 (*j_env)->ExceptionDescribe(j_env);
534 return;
535 }
536
537 /* Copy the query term, and stem it */
538 for (j = 0; j <= query_term_length; j++)
539 raw_stemmed_query_term[j] = raw_query_term[j];
540 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
541
542 /* Allocate memory to store the stemmed query term, and fill it */
543 stemmed_query_term_length = raw_stemmed_query_term[0];
544 stemmed_query_term = malloc(stemmed_query_term_length + 1);
545 assert(stemmed_query_term != NULL);
546 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
547 stemmed_query_term[stemmed_query_term_length] = '\0';
548
549 /* Find all the terms equivalent to the query term */
550 for (j = 0; j < qd->TL->num; j++) {
551 /* Check if the stemmed query term matches the stemmed term */
552 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
553 u_char* raw_term = qd->TL->TE[j].Word;
554 unsigned int term_length = raw_term[0];
555 unsigned char* term;
556 jstring j_term;
557
558 /* Allocate memory to store the query term, and fill it */
559 term = malloc(term_length + 1);
560 assert(term != NULL);
561 strncpy(term, &(raw_term[1]), term_length);
562 term[term_length] = '\0';
563
564 /* Allocate a new jstring for the query term */
565 j_term = (*j_env)->NewStringUTF(j_env, term);
566 assert(j_term != NULL);
567
568 /* Call the addEquivTerm function (Java side) to record the equivalent term */
569 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
570 j_query_term, j_term,
571 (jlong) qd->TL->TE[j].WE.doc_count,
572 (jlong) qd->TL->TE[j].WE.count);
573 exc = (*j_env)->ExceptionOccurred(j_env);
574 if (exc) {
575 (*j_env)->ExceptionDescribe(j_env);
576 return;
577 }
578 }
579 }
580 }
581 }
582}
583
584
585/*******************************************
586 set query options
587 *******************************************/
588
589/* Turn casefolding on or off */
590JNIEXPORT void JNICALL
591Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
592 jboolean j_on)
593{
594 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
595
596 if (j_on) {
597 data->defaultStemMethod |= 1;
598 } else {
599 data->defaultStemMethod &= 0xe;
600 }
601}
602
603
604/* Turn stemming on or off */
605JNIEXPORT void JNICALL
606Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
607 jboolean j_on)
608{
609 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
610
611 if (j_on) {
612 data->defaultStemMethod |= 2;
613 } else {
614 data->defaultStemMethod &= 0xd;
615 }
616}
617
618
619/* Set the maximum number of documents to return from a query */
620JNIEXPORT void JNICALL
621Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
622 jint j_max)
623{
624 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
625 data->queryInfo->maxDocs = j_max;
626}
627
628
629/* Turn term frequency recording on or off */
630JNIEXPORT void JNICALL
631Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
632 jboolean j_on)
633{
634 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
635 data->queryInfo->needTermFreqs = j_on;
636}
637
638
639/* Choose MG index to search */
640JNIEXPORT void JNICALL
641Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
642 jstring j_index)
643{
644 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
645
646 /* Get the index name as a C string */
647 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
648 assert(index != NULL);
649 printf("Choosing index %s...\n", index);
650
651 /* Free the previous index name */
652 if (data->queryInfo->index)
653 free(data->queryInfo->index);
654
655 /* Allocate memory for the index name, and fill it */
656 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
657 assert(data->queryInfo->index != NULL);
658 strcpy(data->queryInfo->index, index);
659
660 /* Release the index string */
661 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
662}
663
664
665/* Choose boolean AND or boolean OR queries */
666JNIEXPORT void JNICALL
667Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
668 jint j_mode)
669{
670 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
671 data->defaultBoolCombine = j_mode;
672}
673
674
675/* Get a text representation of the current parameter values */
676JNIEXPORT jstring JNICALL
677Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
678{
679 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
680 char result[512]; /* Assume this is big enough */
681
682 /* Print the data to a character array */
683 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
684 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
685 (data->defaultStemMethod & 1),
686 (data->defaultStemMethod & 2),
687 (data->defaultBoolCombine == 1 ? "all" : "some"),
688 (data->queryInfo->maxDocs));
689
690 /* Convert to a jstring, and return */
691 return (*j_env)->NewStringUTF(j_env, result);
692}
Note: See TracBrowser for help on using the repository browser.