source: trunk/indexers/mg/jni/MGWrapperImpl.c@ 8919

Last change on this file since 8919 was 8919, checked in by kjdon, 19 years ago

added some bits (copied from mgquery) to make paragraph searching work - it wasn't returning doc numbers, but was returning para numbers which are not in the gdbm database

  • Property svn:keywords set to Author Date Id Revision
File size: 24.2 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20
21#include "MGWrapperImpl.h"
22#include <jni.h>
23#include "org_greenstone_mg_MGWrapper.h"
24
25#include "backend.h"
26#include "environment.h"
27#include "text_get.h"
28#include "stemmer.h"
29
30/*************************************************************************
31 NOTES
32
33 - Features supported by MGPP but not by MG (AFAIK)
34
35 - Sorting results by rank.
36 Done here as a post-processing operation. Could be more efficient in
37 some cases: the current solution is not very good when the number of
38 matching documents is large and the number of desired matches is
39 small. In this case it would be better to iterate through the array
40 picking out the best documents rather than sorting them all.
41
42 - Asking for query term frequencies to be returned.
43 This cannot be turned off in MG. If the term frequencies are not
44 required, they are simply not passed back to the Java side.
45
46 - Choosing the index for queries.
47 It is possible for MG to build Section and Document indexes (for
48 example), but these are two separate indexes, and must be loaded
49 separately. This module can load more than one index at a time, thus
50 processing queries to different indexes more quickly.
51 NOTE: This replaces TWO options in the MGPP version: returnLevel and
52 queryLevel.
53
54 *************************************************************************/
55
56
57#define MAX_INDEXES_CACHED 3
58
59/* copied from mgquery, needed to convert paragraph numbers to document numbers
60 for greenstone */
61#if defined(PARADOCNUM) || defined(NZDL)
62static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63 int Documents = qd->td->cth.num_of_docs;
64 int *Paragraph = qd->paragraph;
65 int low = 1, high = Documents;
66 int mid = (low+high)/2;
67
68 while ((mid = (low+high)/2) >=1 && mid <= Documents)
69 {
70 if (paranum > Paragraph[mid])
71 low = mid+1;
72 else if (paranum <= Paragraph[mid-1])
73 high = mid-1;
74 else
75 return mid;
76 }
77 FatalError(1, "Bad paragraph number.\n");
78 return 0;
79}
80#endif
81
82
83/*********************************************
84 initialisation stuff
85 *********************************************/
86
87/* cached ids for java stuff */
88jfieldID FID_mg_data = NULL; /* MGWrapperData */
89jfieldID FID_query_result = NULL; /* MGQueryResult */
90jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97/* to access objects and methods on java side, need their field/method ids -
98 this initialises them at the start to avoid recalculating them each time they
99 are needed
100Note: the descriptors need to be exactly right, otherwise you get an error
101saying "no such field" but no reference to the fact that it has the right
102name but the wrong type.
103Note: apparently the jclass is a local ref and should only work
104in the method that created it. It seems to work ok, but I'll make it
105global cos the book said I should, and it may avoid future hassles.
106*/
107JNIEXPORT void JNICALL
108Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109{
110 jclass JC_MGQueryResult;
111
112 /* a long-"J" */
113 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114 assert(FID_mg_data != NULL);
115
116 /* an object -"L<class name>;" */
117 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118 "Lorg/greenstone/mg/MGQueryResult;");
119 assert(FID_query_result != NULL);
120
121 /* the methods we want to use */
122 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124 /* addDoc(long doc, float rank) */
125 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126 assert(MID_addDoc != NULL);
127
128 /* addTerm(String term, int stem) */
129 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130 assert(MID_addTerm != NULL);
131
132 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
133 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134 assert(MID_addEquivTerm != NULL);
135
136 /* setTotalDocs(long) */
137 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138 assert(MID_setTotalDocs != NULL);
139
140 /* clear(void) */
141 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142 assert(MID_clearResult != NULL);
143}
144
145
146/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147 initialise this and set the pointer
148*/
149JNIEXPORT jboolean JNICALL
150Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151{
152 /* Allocate a MGWrapperData object to store query parameters */
153 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154 assert(data != NULL);
155
156 /* Set default values - no stemming, no case-folding, boolean OR queries */
157 data->defaultStemMethod = 0;
158 data->defaultBoolCombine = 0;
159
160 /* Allocate a QueryInfo object to store more query parameters */
161 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162 assert(data->queryInfo != NULL);
163
164 /* Set default values - 50 documents max, return term freqs, sort by rank */
165 data->queryInfo->index = NULL;
166 data->queryInfo->maxDocs = 50;
167 data->queryInfo->needTermFreqs = 1;
168
169 /* Save the object on the Java side */
170 (*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172 /* Initialise MG environment variables */
173 InitEnv();
174 SetEnv("expert", "true", NULL);
175 SetEnv("mode", "docnums", NULL);
176 return 1; /* true - no errors */
177}
178
179
180/*******************************************
181 Index caching
182 *******************************************/
183
184query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187/* Get the index data necessary to perform a query or document retrieval */
188query_data*
189loadIndexData(char* base_dir, char* index_path, char* text_path)
190{
191 char* index_path_name;
192 char* text_path_name;
193 query_data* qd;
194 int i = 0;
195
196 /* Form the path name of the desired indexes */
197 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198 assert(index_path_name != NULL);
199 strcpy(index_path_name, base_dir);
200 strcat(index_path_name, index_path);
201 printf("Index pathname: %s\n", index_path_name);
202
203 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204 assert(text_path_name != NULL);
205 strcpy(text_path_name, base_dir);
206 strcat(text_path_name, text_path);
207 printf("Text pathname: %s\n", text_path_name);
208
209 /* Search through the cached indexes for the desired one */
210 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214 /* Check if the index has already been loaded */
215 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217 /* Index has already been loaded and cached, so return it */
218 printf("Found index!\n");
219 free(index_path_name);
220 free(text_path_name);
221 return cached_indexes[i];
222 }
223
224 i++;
225 }
226
227 /* Text strings no longer needed */
228 free(index_path_name);
229 free(text_path_name);
230
231 /* The index is not cached, so load it now */
232 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233 if (!qd) {
234 printf("Error: Could not InitQuerySystem()...\n");
235 return NULL;
236 }
237
238 /* The index loaded OK, so cache it */
239 /* This could be a little more sophisticated, eg. replace least frequently used index */
240 if (i >= MAX_INDEXES_CACHED)
241 i = MAX_INDEXES_CACHED - 1;
242
243 /* Free the index being replaced */
244 if (cached_indexes[i] != NULL)
245 FinishQuerySystem(cached_indexes[i]);
246
247 /* Cache the loaded index, and return it */
248 cached_indexes[i] = qd;
249 return cached_indexes[i];
250}
251
252
253/* Clean up by unloading all cached indexes */
254JNIEXPORT jboolean JNICALL
255Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256{
257 /* Free all the loaded indexes */
258 int i = 0;
259 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260 FinishQuerySystem(cached_indexes[i]);
261 cached_indexes[i] = NULL;
262 i++;
263 }
264
265 return 1; /* true - no errors */
266}
267
268
269/****************************************************
270 retrieve a document
271 ****************************************************/
272
273/* Returns a document from mg as a string */
274JNIEXPORT jstring JNICALL
275Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276 jstring j_base_dir, jstring j_text_path,
277 jlong j_docnum)
278{
279 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281 char* index_path;
282 const char* base_dir;
283 const char* text_path;
284 query_data* qd;
285
286 u_long pos, len;
287 u_char* c_buffer = NULL;
288 u_char* uc_buffer = NULL;
289 int ULen;
290
291 jstring result;
292
293 /* Make sure an index has been specified */
294 index_path = data->queryInfo->index;
295 assert(index_path != NULL);
296
297 /* Obtain C versions of the two string parameters */
298 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299 if (base_dir == NULL) {
300 return NULL;
301 }
302 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303 if (text_path == NULL) {
304 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305 return NULL;
306 }
307
308 /* Load the appropriate index for satisfying this request */
309 printf("Document retrieval, index path: %s\n", index_path);
310 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
311
312 /* The C text strings are no longer needed */
313 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316 /* Check that the index was loaded successfully */
317 assert(qd != NULL);
318
319 /* Get the document position and length in the text file */
320 printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
321 FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
322 printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
323
324 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
325 c_buffer = (u_char*) malloc(len);
326 assert(c_buffer != NULL);
327 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
328 assert(uc_buffer != NULL);
329
330 /* Seek to the correct position in the file and read the document text */
331 Fseek (qd->td->TextFile, pos, 0);
332 Fread (c_buffer, 1, len, qd->td->TextFile);
333
334 /* Decompress the document text into another buffer, and terminate it */
335 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
336 uc_buffer[ULen] = '\0';
337
338 /* Load the document text into a Java string */
339 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
340 assert(result != NULL);
341
342 /* Free C buffers */
343 free(c_buffer);
344 free(uc_buffer);
345
346 /* Return the document text */
347 return result;
348}
349
350
351/*******************************************
352 do a query
353 *******************************************/
354
355/* do the actual query - the results are written to query_result held on the Java side */
356JNIEXPORT void JNICALL
357Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
358 jstring j_base_dir, jstring j_text_path,
359 jstring j_query)
360{
361 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
362
363 char* index_path;
364 const char* base_dir;
365 const char* text_path;
366 query_data* qd;
367
368 jobject result_ptr;
369 char* query;
370 int i, j;
371
372 jthrowable exc;
373
374 /* Make sure an index has been specified */
375 index_path = data->queryInfo->index;
376 assert(index_path != NULL);
377
378 /* Obtain C versions of the two string parameters */
379 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
380 if (base_dir == NULL) {
381 return;
382 }
383 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
384 if (text_path == NULL) {
385 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
386 return;
387 }
388
389 /* Load the appropriate index for satisfying this request */
390 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
391
392 /* The C text strings are no longer needed */
393 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
394 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
395
396 /* Check that the index was loaded successfully */
397 assert(qd != NULL);
398
399 /* Remove anything hanging around from last time */
400 FreeQueryDocs(qd);
401
402 /* The result to write to */
403 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
404 assert(result_ptr != NULL);
405
406 /* Clear any previous query results */
407 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
408 exc = (*j_env)->ExceptionOccurred(j_env);
409 if (exc) {
410 (*j_env)->ExceptionDescribe(j_env);
411 return;
412 }
413
414 /* Obtain a C version of the query string */
415 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
416 assert(query != NULL);
417 printf("Searching for query \"%s\"...\n", query);
418
419 /* Make sure the query isn't empty */
420 if (strlen(query) == 0) {
421 printf("Warning: Empty query.\n");
422 return;
423 }
424
425 /* "Some" queries are done as ranked queries */
426 if (data->defaultBoolCombine == 0) {
427 RankedQueryInfo rqi;
428 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
429 rqi.Exact = 1; /* Perform exact ranking */
430 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
431 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
432 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
433 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
434 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
435 }
436
437 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
438 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
439 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
440 rqi.MaxAccums = 100000;
441 rqi.MaxTerms = -1; /* Use all the query terms */
442 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
443 rqi.StopAtMaxAccum = 1;
444 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
445 rqi.skip_dump = NULL; /* Don't dump skip information */
446
447 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
448 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
449 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
450
451 RankedQuery(qd, query, &rqi);
452 }
453 /* "All" queries are done as boolean queries */
454 else {
455 BooleanQueryInfo bqi;
456 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
457
458 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
459 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
460 }
461
462 /* Finished with the C query string */
463 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
464
465 /* Check the query was processed successfully */
466 assert(qd->DL != NULL);
467 assert(qd->QTL != NULL);
468 assert(qd->TL != NULL);
469
470 /* Record the total number of matching documents */
471 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
472 exc = (*j_env)->ExceptionOccurred(j_env);
473 if (exc) {
474 (*j_env)->ExceptionDescribe(j_env);
475 return;
476 }
477
478 /* Record the matching documents, but only the number requested */
479 printf("Number of matching documents: %d\n", qd->DL->num);
480
481 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
482 int doc_num = qd->DL->DE[i].DocNum;
483#if defined(PARADOCNUM) || defined(NZDL)
484 if (qd->id->ifh.InvfLevel == 3) {
485 /* pararaph level, need to convert to doc level*/
486 doc_num = GetDocNumFromParaNum(qd, doc_num);
487 }
488#endif
489 float doc_weight = qd->DL->DE[i].Weight;
490
491 /* Call the addDoc function (Java side) to record a matching document */
492 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
493 (jlong) doc_num, (jfloat) doc_weight);
494 exc = (*j_env)->ExceptionOccurred(j_env);
495 if (exc) {
496 (*j_env)->ExceptionDescribe(j_env);
497 return;
498 }
499 }
500
501 /* Record the term information, if desired */
502 if (data->queryInfo->needTermFreqs) {
503 /* The following code is a lot more complicated than it could be, but it is necessary
504 to compensate for an oddity in MG. */
505 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
506
507 printf("Number of terms: %d\n", qd->TL->num);
508 printf("Number of query terms: %d\n", qd->QTL->num);
509
510 /* Generate the stemmed form of each of the relevant terms */
511 for (i = 0; i < qd->TL->num; i++) {
512 u_char* raw_term = qd->TL->TE[i].Word;
513 unsigned int term_length = raw_term[0];
514
515 u_char* raw_stemmed_term = malloc(term_length + 1);
516 unsigned int stemmed_term_length;
517
518 /* Copy the term, and stem it */
519 for (j = 0; j <= term_length; j++)
520 raw_stemmed_term[j] = raw_term[j];
521 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
522
523 /* Allocate memory to store the stemmed term, and fill it */
524 stemmed_term_length = raw_stemmed_term[0];
525 stemmed_terms[i] = malloc(stemmed_term_length + 1);
526 assert(stemmed_terms[i] != NULL);
527 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
528 stemmed_terms[i][stemmed_term_length] = '\0';
529 }
530
531 /* Record every query term, along with their equivalent terms */
532 for (i = 0; i < qd->QTL->num; i++) {
533 u_char* raw_query_term = qd->QTL->QTE[i].Term;
534 unsigned int query_term_length = raw_query_term[0];
535 unsigned char* query_term;
536 jstring j_query_term;
537
538 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
539 unsigned int stemmed_query_term_length;
540 unsigned char* stemmed_query_term;
541
542 /* Allocate memory to store the query term, and fill it */
543 query_term = malloc(query_term_length + 1);
544 assert(query_term != NULL);
545 strncpy(query_term, &(raw_query_term[1]), query_term_length);
546 query_term[query_term_length] = '\0';
547
548 /* Allocate a new jstring for the query term */
549 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
550 assert(j_query_term != NULL);
551
552 /* Call the addTerm function (Java side) to record the query term */
553 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
554 j_query_term, (jint) data->defaultStemMethod);
555 exc = (*j_env)->ExceptionOccurred(j_env);
556 if (exc) {
557 (*j_env)->ExceptionDescribe(j_env);
558 return;
559 }
560
561 /* Copy the query term, and stem it */
562 for (j = 0; j <= query_term_length; j++)
563 raw_stemmed_query_term[j] = raw_query_term[j];
564 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
565
566 /* Allocate memory to store the stemmed query term, and fill it */
567 stemmed_query_term_length = raw_stemmed_query_term[0];
568 stemmed_query_term = malloc(stemmed_query_term_length + 1);
569 assert(stemmed_query_term != NULL);
570 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
571 stemmed_query_term[stemmed_query_term_length] = '\0';
572
573 /* Find all the terms equivalent to the query term */
574 for (j = 0; j < qd->TL->num; j++) {
575 /* Check if the stemmed query term matches the stemmed term */
576 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
577 u_char* raw_term = qd->TL->TE[j].Word;
578 unsigned int term_length = raw_term[0];
579 unsigned char* term;
580 jstring j_term;
581
582 /* Allocate memory to store the query term, and fill it */
583 term = malloc(term_length + 1);
584 assert(term != NULL);
585 strncpy(term, &(raw_term[1]), term_length);
586 term[term_length] = '\0';
587
588 /* Allocate a new jstring for the query term */
589 j_term = (*j_env)->NewStringUTF(j_env, term);
590 assert(j_term != NULL);
591
592 /* Call the addEquivTerm function (Java side) to record the equivalent term */
593 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
594 j_query_term, j_term,
595 (jlong) qd->TL->TE[j].WE.doc_count,
596 (jlong) qd->TL->TE[j].WE.count);
597 exc = (*j_env)->ExceptionOccurred(j_env);
598 if (exc) {
599 (*j_env)->ExceptionDescribe(j_env);
600 return;
601 }
602 }
603 }
604 }
605 }
606}
607
608
609/*******************************************
610 set query options
611 *******************************************/
612
613/* Turn casefolding on or off */
614JNIEXPORT void JNICALL
615Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
616 jboolean j_on)
617{
618 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
619
620 if (j_on) {
621 data->defaultStemMethod |= 1;
622 } else {
623 data->defaultStemMethod &= 0xe;
624 }
625}
626
627
628/* Turn stemming on or off */
629JNIEXPORT void JNICALL
630Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
631 jboolean j_on)
632{
633 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
634
635 if (j_on) {
636 data->defaultStemMethod |= 2;
637 } else {
638 data->defaultStemMethod &= 0xd;
639 }
640}
641
642
643/* Set the maximum number of documents to return from a query */
644JNIEXPORT void JNICALL
645Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
646 jint j_max)
647{
648 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
649 data->queryInfo->maxDocs = j_max;
650}
651
652
653/* Turn term frequency recording on or off */
654JNIEXPORT void JNICALL
655Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
656 jboolean j_on)
657{
658 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
659 data->queryInfo->needTermFreqs = j_on;
660}
661
662
663/* Choose MG index to search */
664JNIEXPORT void JNICALL
665Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
666 jstring j_index)
667{
668 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
669
670 /* Get the index name as a C string */
671 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
672 assert(index != NULL);
673 printf("Choosing index %s...\n", index);
674
675 /* Free the previous index name */
676 if (data->queryInfo->index)
677 free(data->queryInfo->index);
678
679 /* Allocate memory for the index name, and fill it */
680 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
681 assert(data->queryInfo->index != NULL);
682 strcpy(data->queryInfo->index, index);
683
684 /* Release the index string */
685 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
686}
687
688
689/* Choose boolean AND or boolean OR queries */
690JNIEXPORT void JNICALL
691Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
692 jint j_mode)
693{
694 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
695 data->defaultBoolCombine = j_mode;
696}
697
698
699/* Get a text representation of the current parameter values */
700JNIEXPORT jstring JNICALL
701Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
702{
703 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetIntField(j_env, j_obj, FID_mg_data);
704 char result[512]; /* Assume this is big enough */
705
706 /* Print the data to a character array */
707 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
708 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
709 (data->defaultStemMethod & 1),
710 (data->defaultStemMethod & 2),
711 (data->defaultBoolCombine == 1 ? "all" : "some"),
712 (data->queryInfo->maxDocs));
713
714 /* Convert to a jstring, and return */
715 return (*j_env)->NewStringUTF(j_env, result);
716}
Note: See TracBrowser for help on using the repository browser.