Context Navigation

source: trunk/indexers/mg/jni/MGWrapperImpl.c@ 8920

Last change on this file since 8920 was 8920, checked in by nzdl, 19 years ago
had to move the float doc_weight up so that it compiled on other machines
Property svn:keywords set to `Author Date Id Revision`
File size: 24.2 KB

Line
1	/*
2	* MGWrapperImpl.c
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19
20
21	#include "MGWrapperImpl.h"
22	#include <jni.h>
23	#include "org_greenstone_mg_MGWrapper.h"
24
25	#include "backend.h"
26	#include "environment.h"
27	#include "text_get.h"
28	#include "stemmer.h"
29
30	/*************************************************************************
31	NOTES
32
33	- Features supported by MGPP but not by MG (AFAIK)
34
35	- Sorting results by rank.
36	Done here as a post-processing operation. Could be more efficient in
37	some cases: the current solution is not very good when the number of
38	matching documents is large and the number of desired matches is
39	small. In this case it would be better to iterate through the array
40	picking out the best documents rather than sorting them all.
41
42	- Asking for query term frequencies to be returned.
43	This cannot be turned off in MG. If the term frequencies are not
44	required, they are simply not passed back to the Java side.
45
46	- Choosing the index for queries.
47	It is possible for MG to build Section and Document indexes (for
48	example), but these are two separate indexes, and must be loaded
49	separately. This module can load more than one index at a time, thus
50	processing queries to different indexes more quickly.
51	NOTE: This replaces TWO options in the MGPP version: returnLevel and
52	queryLevel.
53
54	*************************************************************************/
55
56
57	#define MAX_INDEXES_CACHED 3
58
59	/* copied from mgquery, needed to convert paragraph numbers to document numbers
60	for greenstone */
61	#if defined(PARADOCNUM) \|\| defined(NZDL)
62	static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63	int Documents = qd->td->cth.num_of_docs;
64	int *Paragraph = qd->paragraph;
65	int low = 1, high = Documents;
66	int mid = (low+high)/2;
67
68	while ((mid = (low+high)/2) >=1 && mid <= Documents)
69	{
70	if (paranum > Paragraph[mid])
71	low = mid+1;
72	else if (paranum <= Paragraph[mid-1])
73	high = mid-1;
74	else
75	return mid;
76	}
77	FatalError(1, "Bad paragraph number.\n");
78	return 0;
79	}
80	#endif
81
82
83	/*********************************************
84	initialisation stuff
85	*********************************************/
86
87	/* cached ids for java stuff */
88	jfieldID FID_mg_data = NULL; /* MGWrapperData */
89	jfieldID FID_query_result = NULL; /* MGQueryResult */
90	jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91	jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92	jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93	jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94	jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97	/* to access objects and methods on java side, need their field/method ids -
98	this initialises them at the start to avoid recalculating them each time they
99	are needed
100	Note: the descriptors need to be exactly right, otherwise you get an error
101	saying "no such field" but no reference to the fact that it has the right
102	name but the wrong type.
103	Note: apparently the jclass is a local ref and should only work
104	in the method that created it. It seems to work ok, but I'll make it
105	global cos the book said I should, and it may avoid future hassles.
106	*/
107	JNIEXPORT void JNICALL
108	Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109	{
110	jclass JC_MGQueryResult;
111
112	/* a long-"J" */
113	FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114	assert(FID_mg_data != NULL);
115
116	/* an object -"L<class name>;" */
117	FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118	"Lorg/greenstone/mg/MGQueryResult;");
119	assert(FID_query_result != NULL);
120
121	/* the methods we want to use */
122	JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124	/* addDoc(long doc, float rank) */
125	MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126	assert(MID_addDoc != NULL);
127
128	/* addTerm(String term, int stem) */
129	MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130	assert(MID_addTerm != NULL);
131
132	/* addEquivTerm(String term, String equivTerm, long match, long freq) */
133	MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134	assert(MID_addEquivTerm != NULL);
135
136	/* setTotalDocs(long) */
137	MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138	assert(MID_setTotalDocs != NULL);
139
140	/* clear(void) */
141	MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142	assert(MID_clearResult != NULL);
143	}
144
145
146	/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147	initialise this and set the pointer
148	*/
149	JNIEXPORT jboolean JNICALL
150	Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151	{
152	/* Allocate a MGWrapperData object to store query parameters */
153	MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154	assert(data != NULL);
155
156	/* Set default values - no stemming, no case-folding, boolean OR queries */
157	data->defaultStemMethod = 0;
158	data->defaultBoolCombine = 0;
159
160	/* Allocate a QueryInfo object to store more query parameters */
161	data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162	assert(data->queryInfo != NULL);
163
164	/* Set default values - 50 documents max, return term freqs, sort by rank */
165	data->queryInfo->index = NULL;
166	data->queryInfo->maxDocs = 50;
167	data->queryInfo->needTermFreqs = 1;
168
169	/* Save the object on the Java side */
170	(*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172	/* Initialise MG environment variables */
173	InitEnv();
174	SetEnv("expert", "true", NULL);
175	SetEnv("mode", "docnums", NULL);
176	return 1; /* true - no errors */
177	}
178
179
180	/*******************************************
181	Index caching
182	*******************************************/
183
184	query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187	/* Get the index data necessary to perform a query or document retrieval */
188	query_data*
189	loadIndexData(char* base_dir, char* index_path, char* text_path)
190	{
191	char* index_path_name;
192	char* text_path_name;
193	query_data* qd;
194	int i = 0;
195
196	/* Form the path name of the desired indexes */
197	index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198	assert(index_path_name != NULL);
199	strcpy(index_path_name, base_dir);
200	strcat(index_path_name, index_path);
201	printf("Index pathname: %s\n", index_path_name);
202
203	text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204	assert(text_path_name != NULL);
205	strcpy(text_path_name, base_dir);
206	strcat(text_path_name, text_path);
207	printf("Text pathname: %s\n", text_path_name);
208
209	/* Search through the cached indexes for the desired one */
210	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211	printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212	printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214	/* Check if the index has already been loaded */
215	if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216	(strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217	/* Index has already been loaded and cached, so return it */
218	printf("Found index!\n");
219	free(index_path_name);
220	free(text_path_name);
221	return cached_indexes[i];
222	}
223
224	i++;
225	}
226
227	/* Text strings no longer needed */
228	free(index_path_name);
229	free(text_path_name);
230
231	/* The index is not cached, so load it now */
232	qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233	if (!qd) {
234	printf("Error: Could not InitQuerySystem()...\n");
235	return NULL;
236	}
237
238	/* The index loaded OK, so cache it */
239	/* This could be a little more sophisticated, eg. replace least frequently used index */
240	if (i >= MAX_INDEXES_CACHED)
241	i = MAX_INDEXES_CACHED - 1;
242
243	/* Free the index being replaced */
244	if (cached_indexes[i] != NULL)
245	FinishQuerySystem(cached_indexes[i]);
246
247	/* Cache the loaded index, and return it */
248	cached_indexes[i] = qd;
249	return cached_indexes[i];
250	}
251
252
253	/* Clean up by unloading all cached indexes */
254	JNIEXPORT jboolean JNICALL
255	Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256	{
257	/* Free all the loaded indexes */
258	int i = 0;
259	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260	FinishQuerySystem(cached_indexes[i]);
261	cached_indexes[i] = NULL;
262	i++;
263	}
264
265	return 1; /* true - no errors */
266	}
267
268
269	/****************************************************
270	retrieve a document
271	****************************************************/
272
273	/* Returns a document from mg as a string */
274	JNIEXPORT jstring JNICALL
275	Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276	jstring j_base_dir, jstring j_text_path,
277	jlong j_docnum)
278	{
279	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281	char* index_path;
282	const char* base_dir;
283	const char* text_path;
284	query_data* qd;
285
286	u_long pos, len;
287	u_char* c_buffer = NULL;
288	u_char* uc_buffer = NULL;
289	int ULen;
290
291	jstring result;
292
293	/* Make sure an index has been specified */
294	index_path = data->queryInfo->index;
295	assert(index_path != NULL);
296
297	/* Obtain C versions of the two string parameters */
298	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299	if (base_dir == NULL) {
300	return NULL;
301	}
302	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303	if (text_path == NULL) {
304	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305	return NULL;
306	}
307
308	/* Load the appropriate index for satisfying this request */
309	printf("Document retrieval, index path: %s\n", index_path);
310	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
311
312	/* The C text strings are no longer needed */
313	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316	/* Check that the index was loaded successfully */
317	assert(qd != NULL);
318
319	/* Get the document position and length in the text file */
320	printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
321	FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
322	printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
323
324	/* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
325	c_buffer = (u_char*) malloc(len);
326	assert(c_buffer != NULL);
327	uc_buffer = (u_char) malloc((int) (qd->td->cth.ratio 1.01 * len) + 100);
328	assert(uc_buffer != NULL);
329
330	/* Seek to the correct position in the file and read the document text */
331	Fseek (qd->td->TextFile, pos, 0);
332	Fread (c_buffer, 1, len, qd->td->TextFile);
333
334	/* Decompress the document text into another buffer, and terminate it */
335	DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
336	uc_buffer[ULen] = '\0';
337
338	/* Load the document text into a Java string */
339	result = (*j_env)->NewStringUTF(j_env, uc_buffer);
340	assert(result != NULL);
341
342	/* Free C buffers */
343	free(c_buffer);
344	free(uc_buffer);
345
346	/* Return the document text */
347	return result;
348	}
349
350
351	/*******************************************
352	do a query
353	*******************************************/
354
355	/* do the actual query - the results are written to query_result held on the Java side */
356	JNIEXPORT void JNICALL
357	Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
358	jstring j_base_dir, jstring j_text_path,
359	jstring j_query)
360	{
361	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
362
363	char* index_path;
364	const char* base_dir;
365	const char* text_path;
366	query_data* qd;
367
368	jobject result_ptr;
369	char* query;
370	int i, j;
371
372	jthrowable exc;
373
374	/* Make sure an index has been specified */
375	index_path = data->queryInfo->index;
376	assert(index_path != NULL);
377
378	/* Obtain C versions of the two string parameters */
379	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
380	if (base_dir == NULL) {
381	return;
382	}
383	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
384	if (text_path == NULL) {
385	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
386	return;
387	}
388
389	/* Load the appropriate index for satisfying this request */
390	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
391
392	/* The C text strings are no longer needed */
393	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
394	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
395
396	/* Check that the index was loaded successfully */
397	assert(qd != NULL);
398
399	/* Remove anything hanging around from last time */
400	FreeQueryDocs(qd);
401
402	/* The result to write to */
403	result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
404	assert(result_ptr != NULL);
405
406	/* Clear any previous query results */
407	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
408	exc = (*j_env)->ExceptionOccurred(j_env);
409	if (exc) {
410	(*j_env)->ExceptionDescribe(j_env);
411	return;
412	}
413
414	/* Obtain a C version of the query string */
415	query = (char) (j_env)->GetStringUTFChars(j_env, j_query, NULL);
416	assert(query != NULL);
417	printf("Searching for query \"%s\"...\n", query);
418
419	/* Make sure the query isn't empty */
420	if (strlen(query) == 0) {
421	printf("Warning: Empty query.\n");
422	return;
423	}
424
425	/* "Some" queries are done as ranked queries */
426	if (data->defaultBoolCombine == 0) {
427	RankedQueryInfo rqi;
428	rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
429	rqi.Exact = 1; /* Perform exact ranking */
430	rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
431	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
432	/* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
433	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
434	rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
435	}
436
437	rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
438	rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
439	/* rqi.MaxAccums = -1; / / Use as many accumulators as necessary - CRASHES with list */
440	rqi.MaxAccums = 100000;
441	rqi.MaxTerms = -1; /* Use all the query terms */
442	/* rqi.StopAtMaxAccum = 0;/ / Don't care (using as many accumulators as necessary) */
443	rqi.StopAtMaxAccum = 1;
444	rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
445	rqi.skip_dump = NULL; /* Don't dump skip information */
446
447	/* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
448	SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
449	SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
450
451	RankedQuery(qd, query, &rqi);
452	}
453	/* "All" queries are done as boolean queries */
454	else {
455	BooleanQueryInfo bqi;
456	bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
457
458	/* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
459	BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
460	}
461
462	/* Finished with the C query string */
463	(*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
464
465	/* Check the query was processed successfully */
466	assert(qd->DL != NULL);
467	assert(qd->QTL != NULL);
468	assert(qd->TL != NULL);
469
470	/* Record the total number of matching documents */
471	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
472	exc = (*j_env)->ExceptionOccurred(j_env);
473	if (exc) {
474	(*j_env)->ExceptionDescribe(j_env);
475	return;
476	}
477
478	/* Record the matching documents, but only the number requested */
479	printf("Number of matching documents: %d\n", qd->DL->num);
480
481	for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
482	int doc_num = qd->DL->DE[i].DocNum;
483	float doc_weight = qd->DL->DE[i].Weight;
484
485	#if defined(PARADOCNUM) \|\| defined(NZDL)
486	if (qd->id->ifh.InvfLevel == 3) {
487	/* pararaph level, need to convert to doc level*/
488	doc_num = GetDocNumFromParaNum(qd, doc_num);
489	}
490	#endif
491
492
493	/* Call the addDoc function (Java side) to record a matching document */
494	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
495	(jlong) doc_num, (jfloat) doc_weight);
496	exc = (*j_env)->ExceptionOccurred(j_env);
497	if (exc) {
498	(*j_env)->ExceptionDescribe(j_env);
499	return;
500	}
501	}
502
503	/* Record the term information, if desired */
504	if (data->queryInfo->needTermFreqs) {
505	/* The following code is a lot more complicated than it could be, but it is necessary
506	to compensate for an oddity in MG. */
507	unsigned char** stemmed_terms = malloc(sizeof(unsigned char) qd->TL->num);
508
509	printf("Number of terms: %d\n", qd->TL->num);
510	printf("Number of query terms: %d\n", qd->QTL->num);
511
512	/* Generate the stemmed form of each of the relevant terms */
513	for (i = 0; i < qd->TL->num; i++) {
514	u_char* raw_term = qd->TL->TE[i].Word;
515	unsigned int term_length = raw_term[0];
516
517	u_char* raw_stemmed_term = malloc(term_length + 1);
518	unsigned int stemmed_term_length;
519
520	/* Copy the term, and stem it */
521	for (j = 0; j <= term_length; j++)
522	raw_stemmed_term[j] = raw_term[j];
523	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
524
525	/* Allocate memory to store the stemmed term, and fill it */
526	stemmed_term_length = raw_stemmed_term[0];
527	stemmed_terms[i] = malloc(stemmed_term_length + 1);
528	assert(stemmed_terms[i] != NULL);
529	strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
530	stemmed_terms[i][stemmed_term_length] = '\0';
531	}
532
533	/* Record every query term, along with their equivalent terms */
534	for (i = 0; i < qd->QTL->num; i++) {
535	u_char* raw_query_term = qd->QTL->QTE[i].Term;
536	unsigned int query_term_length = raw_query_term[0];
537	unsigned char* query_term;
538	jstring j_query_term;
539
540	u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
541	unsigned int stemmed_query_term_length;
542	unsigned char* stemmed_query_term;
543
544	/* Allocate memory to store the query term, and fill it */
545	query_term = malloc(query_term_length + 1);
546	assert(query_term != NULL);
547	strncpy(query_term, &(raw_query_term[1]), query_term_length);
548	query_term[query_term_length] = '\0';
549
550	/* Allocate a new jstring for the query term */
551	j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
552	assert(j_query_term != NULL);
553
554	/* Call the addTerm function (Java side) to record the query term */
555	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
556	j_query_term, (jint) data->defaultStemMethod);
557	exc = (*j_env)->ExceptionOccurred(j_env);
558	if (exc) {
559	(*j_env)->ExceptionDescribe(j_env);
560	return;
561	}
562
563	/* Copy the query term, and stem it */
564	for (j = 0; j <= query_term_length; j++)
565	raw_stemmed_query_term[j] = raw_query_term[j];
566	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
567
568	/* Allocate memory to store the stemmed query term, and fill it */
569	stemmed_query_term_length = raw_stemmed_query_term[0];
570	stemmed_query_term = malloc(stemmed_query_term_length + 1);
571	assert(stemmed_query_term != NULL);
572	strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
573	stemmed_query_term[stemmed_query_term_length] = '\0';
574
575	/* Find all the terms equivalent to the query term */
576	for (j = 0; j < qd->TL->num; j++) {
577	/* Check if the stemmed query term matches the stemmed term */
578	if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
579	u_char* raw_term = qd->TL->TE[j].Word;
580	unsigned int term_length = raw_term[0];
581	unsigned char* term;
582	jstring j_term;
583
584	/* Allocate memory to store the query term, and fill it */
585	term = malloc(term_length + 1);
586	assert(term != NULL);
587	strncpy(term, &(raw_term[1]), term_length);
588	term[term_length] = '\0';
589
590	/* Allocate a new jstring for the query term */
591	j_term = (*j_env)->NewStringUTF(j_env, term);
592	assert(j_term != NULL);
593
594	/* Call the addEquivTerm function (Java side) to record the equivalent term */
595	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
596	j_query_term, j_term,
597	(jlong) qd->TL->TE[j].WE.doc_count,
598	(jlong) qd->TL->TE[j].WE.count);
599	exc = (*j_env)->ExceptionOccurred(j_env);
600	if (exc) {
601	(*j_env)->ExceptionDescribe(j_env);
602	return;
603	}
604	}
605	}
606	}
607	}
608	}
609
610
611	/*******************************************
612	set query options
613	*******************************************/
614
615	/* Turn casefolding on or off */
616	JNIEXPORT void JNICALL
617	Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
618	jboolean j_on)
619	{
620	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
621
622	if (j_on) {
623	data->defaultStemMethod \|= 1;
624	} else {
625	data->defaultStemMethod &= 0xe;
626	}
627	}
628
629
630	/* Turn stemming on or off */
631	JNIEXPORT void JNICALL
632	Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
633	jboolean j_on)
634	{
635	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
636
637	if (j_on) {
638	data->defaultStemMethod \|= 2;
639	} else {
640	data->defaultStemMethod &= 0xd;
641	}
642	}
643
644
645	/* Set the maximum number of documents to return from a query */
646	JNIEXPORT void JNICALL
647	Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
648	jint j_max)
649	{
650	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
651	data->queryInfo->maxDocs = j_max;
652	}
653
654
655	/* Turn term frequency recording on or off */
656	JNIEXPORT void JNICALL
657	Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
658	jboolean j_on)
659	{
660	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
661	data->queryInfo->needTermFreqs = j_on;
662	}
663
664
665	/* Choose MG index to search */
666	JNIEXPORT void JNICALL
667	Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
668	jstring j_index)
669	{
670	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
671
672	/* Get the index name as a C string */
673	const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
674	assert(index != NULL);
675	printf("Choosing index %s...\n", index);
676
677	/* Free the previous index name */
678	if (data->queryInfo->index)
679	free(data->queryInfo->index);
680
681	/* Allocate memory for the index name, and fill it */
682	data->queryInfo->index = (char*) malloc(strlen(index) + 1);
683	assert(data->queryInfo->index != NULL);
684	strcpy(data->queryInfo->index, index);
685
686	/* Release the index string */
687	(*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
688	}
689
690
691	/* Choose boolean AND or boolean OR queries */
692	JNIEXPORT void JNICALL
693	Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
694	jint j_mode)
695	{
696	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
697	data->defaultBoolCombine = j_mode;
698	}
699
700
701	/* Get a text representation of the current parameter values */
702	JNIEXPORT jstring JNICALL
703	Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
704	{
705	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
706	char result[512]; /* Assume this is big enough */
707
708	/* Print the data to a character array */
709	sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
710	(data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
711	(data->defaultStemMethod & 1),
712	(data->defaultStemMethod & 2),
713	(data->defaultBoolCombine == 1 ? "all" : "some"),
714	(data->queryInfo->maxDocs));
715
716	/* Convert to a jstring, and return */
717	return (*j_env)->NewStringUTF(j_env, result);
718	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: