source: main/trunk/greenstone2/common-src/indexers/mg/jni/MGWrapperImpl.c@ 26662

Last change on this file since 26662 was 26662, checked in by davidb, 11 years ago

Support for cross-compilation added. This particular set of changes focus on flags that assist cross-compilation with JNI. Comparable set of changes to the mgpp ones. Note the additional type-casting (intptr_t)

  • Property svn:keywords set to Author Date Id Revision
File size: 25.0 KB
Line 
1/*
2 * MGWrapperImpl.c
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20#include <jni.h>
21
22#ifdef __MINGW32__
23
24// Cross compiling for Windows
25// Want the type definitions in *win32* version of jni_md.h but
26// this then leads to C-mangled style functions which we *don't*
27// want. The following achieves this
28
29#undef JNIEXPORT
30#undef JNIIMPORT
31#undef JNICALL
32
33#define JNIEXPORT
34#define JNIIMPORT
35#define JNICALL
36#endif
37
38#include "MGWrapperImpl.h"
39#include "org_greenstone_mg_MGWrapper.h"
40
41
42#include "backend.h"
43#include "environment.h"
44#include "text_get.h"
45#include "stemmer.h"
46
47/*************************************************************************
48 NOTES
49
50 - Features supported by MGPP but not by MG (AFAIK)
51
52 - Sorting results by rank.
53 Done here as a post-processing operation. Could be more efficient in
54 some cases: the current solution is not very good when the number of
55 matching documents is large and the number of desired matches is
56 small. In this case it would be better to iterate through the array
57 picking out the best documents rather than sorting them all.
58
59 - Asking for query term frequencies to be returned.
60 This cannot be turned off in MG. If the term frequencies are not
61 required, they are simply not passed back to the Java side.
62
63 - Choosing the index for queries.
64 It is possible for MG to build Section and Document indexes (for
65 example), but these are two separate indexes, and must be loaded
66 separately. This module can load more than one index at a time, thus
67 processing queries to different indexes more quickly.
68 NOTE: This replaces TWO options in the MGPP version: returnLevel and
69 queryLevel.
70
71 *************************************************************************/
72
73
74#define MAX_INDEXES_CACHED 3
75
76/* copied from mgquery, needed to convert paragraph numbers to document numbers
77 for greenstone */
78#if defined(PARADOCNUM) || defined(NZDL)
79static int GetDocNumFromParaNum(query_data *qd, int paranum) {
80 int Documents = qd->td->cth.num_of_docs;
81 int *Paragraph = qd->paragraph;
82 int low = 1, high = Documents;
83 int mid = (low+high)/2;
84
85 while ((mid = (low+high)/2) >=1 && mid <= Documents)
86 {
87 if (paranum > Paragraph[mid])
88 low = mid+1;
89 else if (paranum <= Paragraph[mid-1])
90 high = mid-1;
91 else
92 return mid;
93 }
94 FatalError(1, "Bad paragraph number.\n");
95 return 0;
96}
97#endif
98
99
100/*********************************************
101 initialisation stuff
102 *********************************************/
103
104/* cached ids for java stuff */
105jfieldID FID_mg_data = NULL; /* MGWrapperData */
106jfieldID FID_query_result = NULL; /* MGQueryResult */
107jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
108jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
109jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
110jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
111jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
112
113
114/* to access objects and methods on java side, need their field/method ids -
115 this initialises them at the start to avoid recalculating them each time they
116 are needed
117Note: the descriptors need to be exactly right, otherwise you get an error
118saying "no such field" but no reference to the fact that it has the right
119name but the wrong type.
120Note: apparently the jclass is a local ref and should only work
121in the method that created it. It seems to work ok, but I'll make it
122global cos the book said I should, and it may avoid future hassles.
123*/
124JNIEXPORT void JNICALL
125Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
126{
127 jclass JC_MGQueryResult;
128
129 /* a long-"J" */
130 FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
131 assert(FID_mg_data != NULL);
132
133 /* an object -"L<class name>;" */
134 FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
135 "Lorg/greenstone/mg/MGQueryResult;");
136 assert(FID_query_result != NULL);
137
138 /* the methods we want to use */
139 JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
140
141 /* addDoc(long doc, float rank) */
142 MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
143 assert(MID_addDoc != NULL);
144
145 /* addTerm(String term, int stem) */
146 MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
147 assert(MID_addTerm != NULL);
148
149 /* addEquivTerm(String term, String equivTerm, long match, long freq) */
150 MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
151 assert(MID_addEquivTerm != NULL);
152
153 /* setTotalDocs(long) */
154 MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
155 assert(MID_setTotalDocs != NULL);
156
157 /* clear(void) */
158 MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
159 assert(MID_clearResult != NULL);
160}
161
162
163/* the java side MGWrapper has a pointer to a C object - MGWrapperData
164 initialise this and set the pointer
165*/
166JNIEXPORT jboolean JNICALL
167Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
168{
169 /* Allocate a MGWrapperData object to store query parameters */
170 MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
171 assert(data != NULL);
172
173 /* Set default values - no stemming, no case-folding, boolean OR queries */
174 data->defaultStemMethod = 0;
175 data->defaultBoolCombine = 0;
176
177 /* Allocate a QueryInfo object to store more query parameters */
178 data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
179 assert(data->queryInfo != NULL);
180
181 /* Set default values - 50 documents max, return term freqs, sort by rank */
182 data->queryInfo->index = NULL;
183 data->queryInfo->maxDocs = 50;
184 data->queryInfo->needTermFreqs = 1;
185
186 /* Save the object on the Java side */
187 (*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
188
189 /* Initialise MG environment variables */
190 InitEnv();
191 SetEnv("expert", "true", NULL);
192 SetEnv("mode", "docnums", NULL);
193
194 return 1; /* true - no errors */
195}
196
197
198/*******************************************
199 Index caching
200 *******************************************/
201
202query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
203
204
205/* Get the index data necessary to perform a query or document retrieval */
206query_data*
207loadIndexData(char* base_dir, char* index_path, char* text_path)
208{
209 char* index_path_name;
210 char* text_path_name;
211 query_data* qd;
212 int i = 0;
213
214 /* Form the path name of the desired indexes */
215 index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
216 assert(index_path_name != NULL);
217 strcpy(index_path_name, base_dir);
218 strcat(index_path_name, index_path);
219 printf("Index pathname: %s\n", index_path_name);
220
221 text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
222 assert(text_path_name != NULL);
223 strcpy(text_path_name, base_dir);
224 strcat(text_path_name, text_path);
225 printf("Text pathname: %s\n", text_path_name);
226
227 /* Search through the cached indexes for the desired one */
228 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
229 printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
230 printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
231
232 /* Check if the index has already been loaded */
233 if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
234 (strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
235 /* Index has already been loaded and cached, so return it */
236 printf("Found index!\n");
237 free(index_path_name);
238 free(text_path_name);
239 return cached_indexes[i];
240 }
241
242 i++;
243 }
244
245 /* Text strings no longer needed */
246 free(index_path_name);
247 free(text_path_name);
248
249 /* The index is not cached, so load it now */
250 qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
251 if (!qd) {
252 printf("Error: Could not InitQuerySystem()...\n");
253 return NULL;
254 }
255
256 /* The index loaded OK, so cache it */
257 /* This could be a little more sophisticated, eg. replace least frequently used index */
258 if (i >= MAX_INDEXES_CACHED)
259 i = MAX_INDEXES_CACHED - 1;
260
261 /* Free the index being replaced */
262 if (cached_indexes[i] != NULL)
263 FinishQuerySystem(cached_indexes[i]);
264
265 /* Cache the loaded index, and return it */
266 cached_indexes[i] = qd;
267 return cached_indexes[i];
268}
269
270
271/* Clean up by unloading all cached indexes */
272JNIEXPORT jboolean JNICALL
273Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
274{
275 /* Free all the loaded indexes */
276 int i = 0;
277 while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
278 FinishQuerySystem(cached_indexes[i]);
279 cached_indexes[i] = NULL;
280 i++;
281 }
282
283 return 1; /* true - no errors */
284}
285
286
287/****************************************************
288 retrieve a document
289 ****************************************************/
290
291/* Returns a document from mg as a string */
292JNIEXPORT jstring JNICALL
293Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
294 jstring j_base_dir, jstring j_text_path,
295 jlong j_docnum)
296{
297 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
298
299 char* index_path;
300 const char* base_dir;
301 const char* text_path;
302 query_data* qd;
303
304 mg_u_long pos, len;
305 u_char* c_buffer = NULL;
306 u_char* uc_buffer = NULL;
307 int ULen;
308
309 jstring result;
310
311 /* Make sure an index has been specified */
312 index_path = data->queryInfo->index;
313 assert(index_path != NULL);
314
315 /* Obtain C versions of the two string parameters */
316 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
317 if (base_dir == NULL) {
318 return NULL;
319 }
320 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
321 if (text_path == NULL) {
322 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
323 return NULL;
324 }
325
326 /* Load the appropriate index for satisfying this request */
327 printf("Document retrieval, index path: %s\n", index_path);
328 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
329
330 /* The C text strings are no longer needed */
331 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
332 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
333
334 /* Check that the index was loaded successfully */
335 if (qd==NULL) {
336 return NULL;
337 }
338 /*assert(qd != NULL);*/
339
340 /* Get the document position and length in the text file */
341 printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
342 FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
343 printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
344
345 /* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
346 c_buffer = (u_char*) malloc(len);
347 assert(c_buffer != NULL);
348 uc_buffer = (u_char*) malloc((int) (qd->td->cth.ratio * 1.01 * len) + 100);
349 assert(uc_buffer != NULL);
350
351 /* Seek to the correct position in the file and read the document text */
352 Fseek (qd->td->TextFile, pos, 0);
353 Fread (c_buffer, 1, len, qd->td->TextFile);
354
355 /* Decompress the document text into another buffer, and terminate it */
356 DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
357 uc_buffer[ULen] = '\0';
358
359 /* Load the document text into a Java string */
360 result = (*j_env)->NewStringUTF(j_env, uc_buffer);
361 assert(result != NULL);
362
363 /* Free C buffers */
364 free(c_buffer);
365 free(uc_buffer);
366
367 /* Return the document text */
368 return result;
369}
370
371
372/*******************************************
373 do a query
374 *******************************************/
375
376/* do the actual query - the results are written to query_result held on the Java side */
377JNIEXPORT void JNICALL
378Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
379 jstring j_base_dir, jstring j_text_path,
380 jstring j_query)
381{
382 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
383
384 char* index_path;
385 const char* base_dir;
386 const char* text_path;
387 query_data* qd;
388
389 jobject result_ptr;
390 char* query;
391 int i, j;
392
393 jthrowable exc;
394 /* First of all, clear the previous result */
395 /* The result to write to */
396 result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
397 assert(result_ptr != NULL);
398
399 /* Clear any previous query results */
400 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
401 exc = (*j_env)->ExceptionOccurred(j_env);
402 if (exc) {
403 (*j_env)->ExceptionDescribe(j_env);
404 return;
405 }
406
407 /* Make sure an index has been specified */
408 index_path = data->queryInfo->index;
409 if (index_path == NULL) {
410 return;
411 }
412
413 /* Obtain C versions of the two string parameters */
414 base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
415 if (base_dir == NULL) {
416 return;
417 }
418 text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
419 if (text_path == NULL) {
420 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
421 return;
422 }
423
424 /* Load the appropriate index for satisfying this request */
425 qd = loadIndexData((char*) base_dir, (char*) index_path, (char*) text_path);
426
427 /* The C text strings are no longer needed */
428 (*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
429 (*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
430
431 /* Check that the index was loaded successfully */
432 if (qd == NULL) {
433 return;
434 }
435
436 /* Remove anything hanging around from last time */
437 FreeQueryDocs(qd);
438
439 /* Obtain a C version of the query string */
440 query = (char*) (*j_env)->GetStringUTFChars(j_env, j_query, NULL);
441 if (query == NULL) {
442 return;
443 }
444 printf("Searching for query \"%s\"...\n", query);
445
446 /* Make sure the query isn't empty */
447 if (strlen(query) == 0) {
448 printf("Warning: Empty query.\n");
449 return;
450 }
451
452 /* "Some" queries are done as ranked queries */
453 if (data->defaultBoolCombine == 0) {
454 RankedQueryInfo rqi;
455 rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
456 rqi.Exact = 1; /* Perform exact ranking */
457 rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
458 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
459 /* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
460 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
461 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
462 }
463
464 rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
465 rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
466 /* rqi.MaxAccums = -1; */ /* Use as many accumulators as necessary - CRASHES with list */
467 rqi.MaxAccums = 100000;
468 rqi.MaxTerms = -1; /* Use all the query terms */
469 /* rqi.StopAtMaxAccum = 0;*/ /* Don't care (using as many accumulators as necessary) */
470 rqi.StopAtMaxAccum = 1;
471 rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
472 rqi.skip_dump = NULL; /* Don't dump skip information */
473
474 /* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
475 SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
476 SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
477
478 RankedQuery(qd, query, &rqi);
479 }
480 /* "All" queries are done as boolean queries */
481 else {
482 BooleanQueryInfo bqi;
483 bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
484
485 /* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
486 BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
487 }
488
489 /* Finished with the C query string */
490 (*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
491
492 /* Check the query was processed successfully */
493 if (qd->DL == NULL || qd->QTL == NULL || qd->TL == NULL) {
494 return;
495 }
496
497 /* Record the total number of matching documents */
498 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
499 exc = (*j_env)->ExceptionOccurred(j_env);
500 if (exc) {
501 (*j_env)->ExceptionDescribe(j_env);
502 return;
503 }
504
505 /* Record the matching documents, but only the number requested */
506 printf("Number of matching documents: %d\n", qd->DL->num);
507
508 for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
509 int doc_num = qd->DL->DE[i].DocNum;
510 float doc_weight = qd->DL->DE[i].Weight;
511
512#if defined(PARADOCNUM) || defined(NZDL)
513 if (qd->id->ifh.InvfLevel == 3) {
514 /* pararaph level, need to convert to doc level*/
515 doc_num = GetDocNumFromParaNum(qd, doc_num);
516 }
517#endif
518
519
520 /* Call the addDoc function (Java side) to record a matching document */
521 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
522 (jlong) doc_num, (jfloat) doc_weight);
523 exc = (*j_env)->ExceptionOccurred(j_env);
524 if (exc) {
525 (*j_env)->ExceptionDescribe(j_env);
526 return;
527 }
528 }
529
530 /* Record the term information, if desired */
531 if (data->queryInfo->needTermFreqs) {
532 /* The following code is a lot more complicated than it could be, but it is necessary
533 to compensate for an oddity in MG. */
534 unsigned char** stemmed_terms = malloc(sizeof(unsigned char*) * qd->TL->num);
535
536 printf("Number of terms: %d\n", qd->TL->num);
537 printf("Number of query terms: %d\n", qd->QTL->num);
538
539 /* Generate the stemmed form of each of the relevant terms */
540 for (i = 0; i < qd->TL->num; i++) {
541 u_char* raw_term = qd->TL->TE[i].Word;
542 unsigned int term_length = raw_term[0];
543
544 u_char* raw_stemmed_term = malloc(term_length + 1);
545 unsigned int stemmed_term_length;
546
547 /* Copy the term, and stem it */
548 for (j = 0; j <= term_length; j++)
549 raw_stemmed_term[j] = raw_term[j];
550 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
551
552 /* Allocate memory to store the stemmed term, and fill it */
553 stemmed_term_length = raw_stemmed_term[0];
554 stemmed_terms[i] = malloc(stemmed_term_length + 1);
555 assert(stemmed_terms[i] != NULL);
556 strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
557 stemmed_terms[i][stemmed_term_length] = '\0';
558 }
559
560 /* Record every query term, along with their equivalent terms */
561 for (i = 0; i < qd->QTL->num; i++) {
562 u_char* raw_query_term = qd->QTL->QTE[i].Term;
563 unsigned int query_term_length = raw_query_term[0];
564 unsigned char* query_term;
565 jstring j_query_term;
566
567 u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
568 unsigned int stemmed_query_term_length;
569 unsigned char* stemmed_query_term;
570
571 /* Allocate memory to store the query term, and fill it */
572 query_term = malloc(query_term_length + 1);
573 assert(query_term != NULL);
574 strncpy(query_term, &(raw_query_term[1]), query_term_length);
575 query_term[query_term_length] = '\0';
576
577 /* Allocate a new jstring for the query term */
578 j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
579 assert(j_query_term != NULL);
580
581 /* Call the addTerm function (Java side) to record the query term */
582 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
583 j_query_term, (jint) data->defaultStemMethod);
584 exc = (*j_env)->ExceptionOccurred(j_env);
585 if (exc) {
586 (*j_env)->ExceptionDescribe(j_env);
587 return;
588 }
589
590 /* Copy the query term, and stem it */
591 for (j = 0; j <= query_term_length; j++)
592 raw_stemmed_query_term[j] = raw_query_term[j];
593 stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
594
595 /* Allocate memory to store the stemmed query term, and fill it */
596 stemmed_query_term_length = raw_stemmed_query_term[0];
597 stemmed_query_term = malloc(stemmed_query_term_length + 1);
598 assert(stemmed_query_term != NULL);
599 strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
600 stemmed_query_term[stemmed_query_term_length] = '\0';
601
602 /* Find all the terms equivalent to the query term */
603 for (j = 0; j < qd->TL->num; j++) {
604 /* Check if the stemmed query term matches the stemmed term */
605 if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
606 u_char* raw_term = qd->TL->TE[j].Word;
607 unsigned int term_length = raw_term[0];
608 unsigned char* term;
609 jstring j_term;
610
611 /* Allocate memory to store the query term, and fill it */
612 term = malloc(term_length + 1);
613 assert(term != NULL);
614 strncpy(term, &(raw_term[1]), term_length);
615 term[term_length] = '\0';
616
617 /* Allocate a new jstring for the query term */
618 j_term = (*j_env)->NewStringUTF(j_env, term);
619 assert(j_term != NULL);
620
621 /* Call the addEquivTerm function (Java side) to record the equivalent term */
622 (*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
623 j_query_term, j_term,
624 (jlong) qd->TL->TE[j].WE.doc_count,
625 (jlong) qd->TL->TE[j].WE.count);
626 exc = (*j_env)->ExceptionOccurred(j_env);
627 if (exc) {
628 (*j_env)->ExceptionDescribe(j_env);
629 return;
630 }
631 }
632 }
633 }
634 }
635}
636
637
638/*******************************************
639 set query options
640 *******************************************/
641
642/* Turn casefolding on or off */
643JNIEXPORT void JNICALL
644Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
645 jboolean j_on)
646{
647 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
648
649 if (j_on) {
650 data->defaultStemMethod |= 1;
651 } else {
652 data->defaultStemMethod &= 0xe;
653 }
654}
655
656
657/* Turn stemming on or off */
658JNIEXPORT void JNICALL
659Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
660 jboolean j_on)
661{
662 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
663
664 if (j_on) {
665 data->defaultStemMethod |= 2;
666 } else {
667 data->defaultStemMethod &= 0xd;
668 }
669}
670
671
672/* Set the maximum number of documents to return from a query */
673JNIEXPORT void JNICALL
674Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
675 jint j_max)
676{
677 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
678 data->queryInfo->maxDocs = j_max;
679}
680
681/* set the maximum number of numeric to split*/
682JNIEXPORT void JNICALL
683Java_org_greenstone_mg_MGWrapper_setMaxNumeric (JNIEnv *j_env,
684 jobject j_obj,
685 jint j_max) {
686
687 char text[20];
688 char* maxnumeric;
689 sprintf(text,"%d",j_max);
690 maxnumeric = text;
691 SetEnv("maxnumeric",maxnumeric, NULL);
692}
693
694
695/* Turn term frequency recording on or off */
696JNIEXPORT void JNICALL
697Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
698 jboolean j_on)
699{
700 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
701 data->queryInfo->needTermFreqs = j_on;
702}
703
704
705/* Choose MG index to search */
706JNIEXPORT void JNICALL
707Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
708 jstring j_index)
709{
710 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
711
712 /* Get the index name as a C string */
713 const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
714 assert(index != NULL);
715 printf("Choosing index %s...\n", index);
716
717 /* Free the previous index name */
718 if (data->queryInfo->index)
719 free(data->queryInfo->index);
720
721 /* Allocate memory for the index name, and fill it */
722 data->queryInfo->index = (char*) malloc(strlen(index) + 1);
723 assert(data->queryInfo->index != NULL);
724 strcpy(data->queryInfo->index, index);
725
726 /* Release the index string */
727 (*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
728}
729
730
731/* Choose boolean AND or boolean OR queries */
732JNIEXPORT void JNICALL
733Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
734 jint j_mode)
735{
736 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
737 data->defaultBoolCombine = j_mode;
738}
739
740
741/* Get a text representation of the current parameter values */
742JNIEXPORT jstring JNICALL
743Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
744{
745 MGWrapperData* data = (MGWrapperData*) (*j_env)->GetLongField(j_env, j_obj, FID_mg_data);
746 char result[512]; /* Assume this is big enough */
747
748 /* Print the data to a character array */
749 sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
750 (data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
751 (data->defaultStemMethod & 1),
752 (data->defaultStemMethod & 2),
753 (data->defaultBoolCombine == 1 ? "all" : "some"),
754 (data->queryInfo->maxDocs));
755
756 /* Convert to a jstring, and return */
757 return (*j_env)->NewStringUTF(j_env, result);
758}
Note: See TracBrowser for help on using the repository browser.