source: trunk/indexers/mg/jni/MGWrapperImpl.c@ 8920

Last change on this file since 8920 was 8920, checked in by nzdl, 19 years ago

had to move the float doc_weight up so that it compiled on other machines

  • Property svn:keywords set to Author Date Id Revision
File size: 24.2 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30/*************************************************************************
31 NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35 - Sorting results by rank.
36 Done here as a post-processing operation. Could be more efficient in
37 some cases: the current solution is not very good when the number of
38 matching documents is large and the number of desired matches is
39 small. In this case it would be better to iterate through the array
40 picking out the best documents rather than sorting them all.
41
42 - Asking for query term frequencies to be returned.
43 This cannot be turned off in MG. If the term frequencies are not
44 required, they are simply not passed back to the Java side.
45
46 - Choosing the index for queries.
47 It is possible for MG to build Section and Document indexes (for
48 example), but these are two separate indexes, and must be loaded
49 separately. This module can load more than one index at a time, thus
50 processing queries to different indexes more quickly.
51 NOTE: This replaces TWO options in the MGPP version: returnLevel and
52 queryLevel.
53
54 *************************************************************************/
55
56
57#define MAX_INDEXES_CACHED 3
58
59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60 for greenstone */
61#if defined(PARADOCNUM) || defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63 int Documents = qd->td->cth.num_of_docs;
64 int *Paragraph = qd->paragraph;
65 int low = 1, high = Documents;
66 int mid = (low+high)/2;
67
68 while ((mid = (low+high)/2) >=1 && mid <= Documents)
69 {
70 if (paranum > Paragraph[mid])
71 low = mid+1;
72 else if (paranum <= Paragraph[mid-1])
73 high = mid-1;
74 else
75 return mid;
76 }
77 FatalError(1, "Bad paragraph number.\n");
78 return 0;
79}
80#endif
81
82
83/*********************************************
84 initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110 jclass JC_MGQueryResult;
111
112 /* a long-"J" */
113 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114 assert(FID_mg_data != NULL);
115
116 /* an object -"L<class name>;" */
117 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118 "Lorg/greenstone/mg/MGQueryResult;");
119 assert(FID_query_result != NULL);
120
121 /* the methods we want to use */
122 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124 /* addDoc(long doc, float rank) */
125 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126 assert(MID_addDoc != NULL);
127
128 /* addTerm(String term, int stem) */
129 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130 assert(MID_addTerm != NULL);
131
132 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134 assert(MID_addEquivTerm != NULL);
135
136 /* setTotalDocs(long) */
137 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138 assert(MID_setTotalDocs != NULL);
139
140 /* clear(void) */
141 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142 assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147 initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152 /* Allocate a MGWrapperData object to store query parameters */
153 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154 assert(data != NULL);
155
156 /* Set default values - no stemming, no case-folding, boolean OR queries */
157 data->defaultStemMethod = 0;
158 data->defaultBoolCombine = 0;
159
160 /* Allocate a QueryInfo object to store more query parameters */
161 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162 assert(data->queryInfo != NULL);
163
164 /* Set default values - 50 documents max, return term freqs, sort by rank */
165 data->queryInfo->index = NULL;
166 data->queryInfo->maxDocs = 50;
167 data->queryInfo->needTermFreqs = 1;
168
169 /* Save the object on the Java side */
170 (*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172 /* Initialise MG environment variables */
173 InitEnv();
174 SetEnv("expert", "true", NULL);
175 SetEnv("mode", "docnums", NULL);
176 return 1; /* true - no errors */
177}
178
179
180/*******************************************
181 Index caching
182 *******************************************/
183
184query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187/* Get the index data necessary to perform a query or document retrieval */
188query_data*
189loadIndexData(char* base_dir, char* index_path, char* text_path)
190{
191 char* index_path_name;
192 char* text_path_name;
193 query_data* qd;
194 int i = 0;
195
196 /* Form the path name of the desired indexes */
197 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198 assert(index_path_name != NULL);
199 strcpy(index_path_name, base_dir);
200 strcat(index_path_name, index_path);
201 printf("Index pathname: %s\n", index_path_name);
202
203 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204 assert(text_path_name != NULL);
205 strcpy(text_path_name, base_dir);
206 strcat(text_path_name, text_path);
207 printf("Text pathname: %s\n", text_path_name);
208
209 /* Search through the cached indexes for the desired one */
210 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214 /* Check if the index has already been loaded */
215 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217 /* Index has already been loaded and cached, so return it */
218 printf("Found index!\n");
219 free(index_path_name);
220 free(text_path_name);
221 return cached_indexes[i];
222 }
223
224 i++;
225 }
226
227 /* Text strings no longer needed */
228 free(index_path_name);
229 free(text_path_name);
230
231 /* The index is not cached, so load it now */
232 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233 if (!qd) {
234 printf("Error: Could not InitQuerySystem()...\n");
235 return NULL;
236 }
237
238 /* The index loaded OK, so cache it */
239 /* This could be a little more sophisticated, eg. replace least frequently used index */
240 if (i >= MAX_INDEXES_CACHED)
241 i = MAX_INDEXES_CACHED - 1;
242
243 /* Free the index being replaced */
244 if (cached_indexes[i] != NULL)
245 FinishQuerySystem(cached_indexes[i]);
246
247 /* Cache the loaded index, and return it */
248 cached_indexes[i] = qd;
249 return cached_indexes[i];
250}
251
252
253/* Clean up by unloading all cached indexes */
254JNIEXPORT jboolean JNICALL
255Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256{
257 /* Free all the loaded indexes */
258 int i = 0;
259 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260 FinishQuerySystem(cached_indexes[i]);
261 cached_indexes[i] = NULL;
262 i++;
263 }
264
265 return 1; /* true - no errors */
266}
267
268
269/****************************************************
270 retrieve a document
271 ****************************************************/
272
273/* Returns a document from mg as a string */
274JNIEXPORT jstring JNICALL
275Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276 jstring j_base_dir, jstring j_text_path,
277 jlong j_docnum)
278{
279 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281 char* index_path;
282 const char* base_dir;
283 const char* text_path;
284 query_data* qd;
285
286 u_long pos, len;
287 u_char* c_buffer = NULL;
288 u_char* uc_buffer = NULL;
289 int ULen;
290
291 jstring result;
292
293 /* Make sure an index has been specified */
294 index_path = data->queryInfo->index;
295 assert(index_path != NULL);
296
297 /* Obtain C versions of the two string parameters */
298 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299 if (base_dir == NULL) {
300 return NULL;
301 }
302 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303 if (text_path == NULL) {
304 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305 return NULL;
306 }
307
308 /* Load the appropriate index for satisfying this request */
309 printf("Document retrieval, index path: %s\n", index_path);
310 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
311
312 /* The C text strings are no longer needed */
313 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316 /* Check that the index was loaded successfully */
317 assert(qd != NULL);
318
319 /* Get the document position and length in the text file */
320 printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
321 FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
322 printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
323
324 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
325 c_buffer = (u_char*) malloc(len);
326 assert(c_buffer != NULL);
327 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
328 assert(uc_buffer != NULL);
329
330 /* Seek to the correct position in the file and read the document text */
331 Fseek (qd->td->TextFile, pos, 0);
332 Fread (c_buffer, 1, len, qd->td->TextFile);
333
334 /* Decompress the document text into another buffer, and terminate it */
335 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
336 uc_buffer[ULen] = '\0';
337
338 /* Load the document text into a Java string */
339 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
340 assert(result != NULL);
341
342 /* Free C buffers */
343 free(c_buffer);
344 free(uc_buffer);
345
346 /* Return the document text */
347 return result;
348}
349
350
351/*******************************************
352 do a query
353 *******************************************/
354
355/* do the actual query - the results are written to query_result held on the Java side */
356JNIEXPORT void JNICALL
357Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
358 jstring j_base_dir, jstring j_text_path,
359 jstring j_query)
360{
361 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
362
363 char* index_path;
364 const char* base_dir;
365 const char* text_path;
366 query_data* qd;
367
368 jobject result_ptr;
369 char* query;
370 int i, j;
371
372 jthrowable exc;
373
374 /* Make sure an index has been specified */
375 index_path = data->queryInfo->index;
376 assert(index_path != NULL);
377
378 /* Obtain C versions of the two string parameters */
379 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
380 if (base_dir == NULL) {
381 return;
382 }
383 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
384 if (text_path == NULL) {
385 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
386 return;
387 }
388
389 /* Load the appropriate index for satisfying this request */
390 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
391
392 /* The C text strings are no longer needed */
393 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
394 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
395
396 /* Check that the index was loaded successfully */
397 assert(qd != NULL);
398
399 /* Remove anything hanging around from last time */
400 FreeQueryDocs(qd);
401
402 /* The result to write to */
403 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
404 assert(result_ptr != NULL);
405
406 /* Clear any previous query results */
407 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
408 exc = (*j_env)->ExceptionOccurred(j_env);
409 if (exc) {
410 (*j_env)->ExceptionDescribe(j_env);
411 return;
412 }
413
414 /* Obtain a C version of the query string */
415 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
416 assert(query != NULL);
417 printf("Searching for query \"%s\"...\n", query);
418
419 /* Make sure the query isn't empty */
420 if (strlen(query) == 0) {
421 printf("Warning: Empty query.\n");
422 return;
423 }
424
425 /* "Some" queries are done as ranked queries */
426 if (data->defaultBoolCombine == 0) {
427 RankedQueryInfo rqi;
428 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
429 rqi.Exact = 1; /* Perform exact ranking */
430 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
431 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
432 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
433 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
434 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
435 }
436
437 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
438 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
439 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
440 rqi.MaxAccums = 100000;
441 rqi.MaxTerms = -1; /* Use all the query terms */
442 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
443 rqi.StopAtMaxAccum = 1;
444 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
445 rqi.skip_dump = NULL; /* Don't dump skip information */
446
447 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
448 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
449 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
450
451 RankedQuery(qd, query, &rqi);
452 }
453 /* "All" queries are done as boolean queries */
454 else {
455 BooleanQueryInfo bqi;
456 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
457
458 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
459 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
460 }
461
462 /* Finished with the C query string */
463 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
464
465 /* Check the query was processed successfully */
466 assert(qd->DL != NULL);
467 assert(qd->QTL != NULL);
468 assert(qd->TL != NULL);
469
470 /* Record the total number of matching documents */
471 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
472 exc = (*j_env)->ExceptionOccurred(j_env);
473 if (exc) {
474 (*j_env)->ExceptionDescribe(j_env);
475 return;
476 }
477
478 /* Record the matching documents, but only the number requested */
479 printf("Number of matching documents: %d\n", qd->DL->num);
480
481 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
482 int doc_num = qd->DL->DE[i].DocNum;
483 float doc_weight = qd->DL->DE[i].Weight;
484
485#if defined(PARADOCNUM) || defined(NZDL)
486 if (qd->id->ifh.InvfLevel == 3) {
487 /* pararaph level, need to convert to doc level*/
488 doc_num = GetDocNumFromParaNum(qd, doc_num);
489 }
490#endif
491
492
493 /* Call the addDoc function (Java side) to record a matching document */
494 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
495 (jlong) doc_num, (jfloat) doc_weight);
496 exc = (*j_env)->ExceptionOccurred(j_env);
497 if (exc) {
498 (*j_env)->ExceptionDescribe(j_env);
499 return;
500 }
501 }
502
503 /* Record the term information, if desired */
504 if (data->queryInfo->needTermFreqs) {
505 /* The following code is a lot more complicated than it could be, but it is necessary
506 to compensate for an oddity in MG. */
507 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
508
509 printf("Number of terms: %d\n", qd->TL->num);
510 printf("Number of query terms: %d\n", qd->QTL->num);
511
512 /* Generate the stemmed form of each of the relevant terms */
513 for (i = 0; i < qd->TL->num; i++) {
514 u_char* raw_term = qd->TL->TE[i].Word;
515 unsigned int term_length = raw_term[0];
516
517 u_char* raw_stemmed_term = malloc(term_length + 1);
518 unsigned int stemmed_term_length;
519
520 /* Copy the term, and stem it */
521 for (j = 0; j <= term_length; j++)
522 raw_stemmed_term[j] = raw_term[j];
523 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
524
525 /* Allocate memory to store the stemmed term, and fill it */
526 stemmed_term_length = raw_stemmed_term[0];
527 stemmed_terms[i] = malloc(stemmed_term_length + 1);
528 assert(stemmed_terms[i] != NULL);
529 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
530 stemmed_terms[i][stemmed_term_length] = '\0';
531 }
532
533 /* Record every query term, along with their equivalent terms */
534 for (i = 0; i < qd->QTL->num; i++) {
535 u_char* raw_query_term = qd->QTL->QTE[i].Term;
536 unsigned int query_term_length = raw_query_term[0];
537 unsigned char* query_term;
538 jstring j_query_term;
539
540 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
541 unsigned int stemmed_query_term_length;
542 unsigned char* stemmed_query_term;
543
544 /* Allocate memory to store the query term, and fill it */
545 query_term = malloc(query_term_length + 1);
546 assert(query_term != NULL);
547 strncpy(query_term, &(raw_query_term[1]), query_term_length);
548 query_term[query_term_length] = '\0';
549
550 /* Allocate a new jstring for the query term */
551 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
552 assert(j_query_term != NULL);
553
554 /* Call the addTerm function (Java side) to record the query term */
555 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
556 j_query_term, (jint) data->defaultStemMethod);
557 exc = (*j_env)->ExceptionOccurred(j_env);
558 if (exc) {
559 (*j_env)->ExceptionDescribe(j_env);
560 return;
561 }
562
563 /* Copy the query term, and stem it */
564 for (j = 0; j <= query_term_length; j++)
565 raw_stemmed_query_term[j] = raw_query_term[j];
566 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
567
568 /* Allocate memory to store the stemmed query term, and fill it */
569 stemmed_query_term_length = raw_stemmed_query_term[0];
570 stemmed_query_term = malloc(stemmed_query_term_length + 1);
571 assert(stemmed_query_term != NULL);
572 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
573 stemmed_query_term[stemmed_query_term_length] = '\0';
574
575 /* Find all the terms equivalent to the query term */
576 for (j = 0; j < qd->TL->num; j++) {
577 /* Check if the stemmed query term matches the stemmed term */
578 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
579 u_char* raw_term = qd->TL->TE[j].Word;
580 unsigned int term_length = raw_term[0];
581 unsigned char* term;
582 jstring j_term;
583
584 /* Allocate memory to store the query term, and fill it */
585 term = malloc(term_length + 1);
586 assert(term != NULL);
587 strncpy(term, &(raw_term[1]), term_length);
588 term[term_length] = '\0';
589
590 /* Allocate a new jstring for the query term */
591 j_term = (*j_env)->NewStringUTF(j_env, term);
592 assert(j_term != NULL);
593
594 /* Call the addEquivTerm function (Java side) to record the equivalent term */
595 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
596 j_query_term, j_term,
597 (jlong) qd->TL->TE[j].WE.doc_count,
598 (jlong) qd->TL->TE[j].WE.count);
599 exc = (*j_env)->ExceptionOccurred(j_env);
600 if (exc) {
601 (*j_env)->ExceptionDescribe(j_env);
602 return;
603 }
604 }
605 }
606 }
607 }
608}
609
610
611/*******************************************
612 set query options
613 *******************************************/
614
615/* Turn casefolding on or off */
616JNIEXPORT void JNICALL
617Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
618 jboolean j_on)
619{
620 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
621
622 if (j_on) {
623 data->defaultStemMethod |= 1;
624 } else {
625 data->defaultStemMethod &= 0xe;
626 }
627}
628
629
630/* Turn stemming on or off */
631JNIEXPORT void JNICALL
632Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
633 jboolean j_on)
634{
635 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
636
637 if (j_on) {
638 data->defaultStemMethod |= 2;
639 } else {
640 data->defaultStemMethod &= 0xd;
641 }
642}
643
644
645/* Set the maximum number of documents to return from a query */
646JNIEXPORT void JNICALL
647Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
648 jint j_max)
649{
650 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
651 data->queryInfo->maxDocs = j_max;
652}
653
654
655/* Turn term frequency recording on or off */
656JNIEXPORT void JNICALL
657Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
658 jboolean j_on)
659{
660 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
661 data->queryInfo->needTermFreqs = j_on;
662}
663
664
665/* Choose MG index to search */
666JNIEXPORT void JNICALL
667Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
668 jstring j_index)
669{
670 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
671
672 /* Get the index name as a C string */
673 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
674 assert(index != NULL);
675 printf("Choosing index %s...\n", index);
676
677 /* Free the previous index name */
678 if (data->queryInfo->index)
679 free(data->queryInfo->index);
680
681 /* Allocate memory for the index name, and fill it */
682 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
683 assert(data->queryInfo->index != NULL);
684 strcpy(data->queryInfo->index, index);
685
686 /* Release the index string */
687 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
688}
689
690
691/* Choose boolean AND or boolean OR queries */
692JNIEXPORT void JNICALL
693Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
694 jint j_mode)
695{
696 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
697 data->defaultBoolCombine = j_mode;
698}
699
700
701/* Get a text representation of the current parameter values */
702JNIEXPORT jstring JNICALL
703Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
704{
705 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
706 char result[512]; /* Assume this is big enough */
707
708 /* Print the data to a character array */
709 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
710 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
711 (data->defaultStemMethod & 1),
712 (data->defaultStemMethod & 2),
713 (data->defaultBoolCombine == 1 ? "all" : "some"),
714 (data->queryInfo->maxDocs));
715
716 /* Convert to a jstring, and return */
717 return (*j_env)->NewStringUTF(j_env, result);
718}
Note: See TracBrowser for help on using the repository browser.