Context Navigation

source: trunk/indexers/mg/jni/MGWrapperImpl.c@ 11021

Last change on this file since 11021 was 11021, checked in by kjdon, 18 years ago
asserts if they fail kill tomcat, so changed some asserts to if statements
Property svn:keywords set to `Author Date Id Revision`
File size: 24.3 KB

Line
1	/*
2	* MGWrapperImpl.c
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19
20
21	#include "MGWrapperImpl.h"
22	#include <jni.h>
23	#include "org_greenstone_mg_MGWrapper.h"
24
25	#include "backend.h"
26	#include "environment.h"
27	#include "text_get.h"
28	#include "stemmer.h"
29
30	/*************************************************************************
31	NOTES
32
33	- Features supported by MGPP but not by MG (AFAIK)
34
35	- Sorting results by rank.
36	Done here as a post-processing operation. Could be more efficient in
37	some cases: the current solution is not very good when the number of
38	matching documents is large and the number of desired matches is
39	small. In this case it would be better to iterate through the array
40	picking out the best documents rather than sorting them all.
41
42	- Asking for query term frequencies to be returned.
43	This cannot be turned off in MG. If the term frequencies are not
44	required, they are simply not passed back to the Java side.
45
46	- Choosing the index for queries.
47	It is possible for MG to build Section and Document indexes (for
48	example), but these are two separate indexes, and must be loaded
49	separately. This module can load more than one index at a time, thus
50	processing queries to different indexes more quickly.
51	NOTE: This replaces TWO options in the MGPP version: returnLevel and
52	queryLevel.
53
54	*************************************************************************/
55
56
57	#define MAX_INDEXES_CACHED 3
58
59	/* copied from mgquery, needed to convert paragraph numbers to document numbers
60	for greenstone */
61	#if defined(PARADOCNUM) \|\| defined(NZDL)
62	static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63	int Documents = qd->td->cth.num_of_docs;
64	int *Paragraph = qd->paragraph;
65	int low = 1, high = Documents;
66	int mid = (low+high)/2;
67
68	while ((mid = (low+high)/2) >=1 && mid <= Documents)
69	{
70	if (paranum > Paragraph[mid])
71	low = mid+1;
72	else if (paranum <= Paragraph[mid-1])
73	high = mid-1;
74	else
75	return mid;
76	}
77	FatalError(1, "Bad paragraph number.\n");
78	return 0;
79	}
80	#endif
81
82
83	/*********************************************
84	initialisation stuff
85	*********************************************/
86
87	/* cached ids for java stuff */
88	jfieldID FID_mg_data = NULL; /* MGWrapperData */
89	jfieldID FID_query_result = NULL; /* MGQueryResult */
90	jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91	jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92	jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93	jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94	jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97	/* to access objects and methods on java side, need their field/method ids -
98	this initialises them at the start to avoid recalculating them each time they
99	are needed
100	Note: the descriptors need to be exactly right, otherwise you get an error
101	saying "no such field" but no reference to the fact that it has the right
102	name but the wrong type.
103	Note: apparently the jclass is a local ref and should only work
104	in the method that created it. It seems to work ok, but I'll make it
105	global cos the book said I should, and it may avoid future hassles.
106	*/
107	JNIEXPORT void JNICALL
108	Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109	{
110	jclass JC_MGQueryResult;
111
112	/* a long-"J" */
113	FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114	assert(FID_mg_data != NULL);
115
116	/* an object -"L<class name>;" */
117	FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118	"Lorg/greenstone/mg/MGQueryResult;");
119	assert(FID_query_result != NULL);
120
121	/* the methods we want to use */
122	JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124	/* addDoc(long doc, float rank) */
125	MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126	assert(MID_addDoc != NULL);
127
128	/* addTerm(String term, int stem) */
129	MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130	assert(MID_addTerm != NULL);
131
132	/* addEquivTerm(String term, String equivTerm, long match, long freq) */
133	MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134	assert(MID_addEquivTerm != NULL);
135
136	/* setTotalDocs(long) */
137	MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138	assert(MID_setTotalDocs != NULL);
139
140	/* clear(void) */
141	MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142	assert(MID_clearResult != NULL);
143	}
144
145
146	/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147	initialise this and set the pointer
148	*/
149	JNIEXPORT jboolean JNICALL
150	Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151	{
152	/* Allocate a MGWrapperData object to store query parameters */
153	MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154	assert(data != NULL);
155
156	/* Set default values - no stemming, no case-folding, boolean OR queries */
157	data->defaultStemMethod = 0;
158	data->defaultBoolCombine = 0;
159
160	/* Allocate a QueryInfo object to store more query parameters */
161	data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162	assert(data->queryInfo != NULL);
163
164	/* Set default values - 50 documents max, return term freqs, sort by rank */
165	data->queryInfo->index = NULL;
166	data->queryInfo->maxDocs = 50;
167	data->queryInfo->needTermFreqs = 1;
168
169	/* Save the object on the Java side */
170	(*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172	/* Initialise MG environment variables */
173	InitEnv();
174	SetEnv("expert", "true", NULL);
175	SetEnv("mode", "docnums", NULL);
176	return 1; /* true - no errors */
177	}
178
179
180	/*******************************************
181	Index caching
182	*******************************************/
183
184	query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187	/* Get the index data necessary to perform a query or document retrieval */
188	query_data*
189	loadIndexData(char* base_dir, char* index_path, char* text_path)
190	{
191	char* index_path_name;
192	char* text_path_name;
193	query_data* qd;
194	int i = 0;
195
196	/* Form the path name of the desired indexes */
197	index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198	assert(index_path_name != NULL);
199	strcpy(index_path_name, base_dir);
200	strcat(index_path_name, index_path);
201	printf("Index pathname: %s\n", index_path_name);
202
203	text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204	assert(text_path_name != NULL);
205	strcpy(text_path_name, base_dir);
206	strcat(text_path_name, text_path);
207	printf("Text pathname: %s\n", text_path_name);
208
209	/* Search through the cached indexes for the desired one */
210	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211	printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212	printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214	/* Check if the index has already been loaded */
215	if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216	(strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217	/* Index has already been loaded and cached, so return it */
218	printf("Found index!\n");
219	free(index_path_name);
220	free(text_path_name);
221	return cached_indexes[i];
222	}
223
224	i++;
225	}
226
227	/* Text strings no longer needed */
228	free(index_path_name);
229	free(text_path_name);
230
231	/* The index is not cached, so load it now */
232	qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233	if (!qd) {
234	printf("Error: Could not InitQuerySystem()...\n");
235	return NULL;
236	}
237
238	/* The index loaded OK, so cache it */
239	/* This could be a little more sophisticated, eg. replace least frequently used index */
240	if (i >= MAX_INDEXES_CACHED)
241	i = MAX_INDEXES_CACHED - 1;
242
243	/* Free the index being replaced */
244	if (cached_indexes[i] != NULL)
245	FinishQuerySystem(cached_indexes[i]);
246
247	/* Cache the loaded index, and return it */
248	cached_indexes[i] = qd;
249	return cached_indexes[i];
250	}
251
252
253	/* Clean up by unloading all cached indexes */
254	JNIEXPORT jboolean JNICALL
255	Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256	{
257	/* Free all the loaded indexes */
258	int i = 0;
259	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260	FinishQuerySystem(cached_indexes[i]);
261	cached_indexes[i] = NULL;
262	i++;
263	}
264
265	return 1; /* true - no errors */
266	}
267
268
269	/****************************************************
270	retrieve a document
271	****************************************************/
272
273	/* Returns a document from mg as a string */
274	JNIEXPORT jstring JNICALL
275	Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276	jstring j_base_dir, jstring j_text_path,
277	jlong j_docnum)
278	{
279	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281	char* index_path;
282	const char* base_dir;
283	const char* text_path;
284	query_data* qd;
285
286	u_long pos, len;
287	u_char* c_buffer = NULL;
288	u_char* uc_buffer = NULL;
289	int ULen;
290
291	jstring result;
292
293	/* Make sure an index has been specified */
294	index_path = data->queryInfo->index;
295	assert(index_path != NULL);
296
297	/* Obtain C versions of the two string parameters */
298	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299	if (base_dir == NULL) {
300	return NULL;
301	}
302	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303	if (text_path == NULL) {
304	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305	return NULL;
306	}
307
308	/* Load the appropriate index for satisfying this request */
309	printf("Document retrieval, index path: %s\n", index_path);
310	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
311
312	/* The C text strings are no longer needed */
313	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316	/* Check that the index was loaded successfully */
317	if (qd==NULL) {
318	return NULL;
319	}
320	/assert(qd != NULL);/
321
322	/* Get the document position and length in the text file */
323	printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
324	FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
325	printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
326
327	/* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
328	c_buffer = (u_char*) malloc(len);
329	assert(c_buffer != NULL);
330	uc_buffer = (u_char) malloc((int) (qd->td->cth.ratio 1.01 * len) + 100);
331	assert(uc_buffer != NULL);
332
333	/* Seek to the correct position in the file and read the document text */
334	Fseek (qd->td->TextFile, pos, 0);
335	Fread (c_buffer, 1, len, qd->td->TextFile);
336
337	/* Decompress the document text into another buffer, and terminate it */
338	DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
339	uc_buffer[ULen] = '\0';
340
341	/* Load the document text into a Java string */
342	result = (*j_env)->NewStringUTF(j_env, uc_buffer);
343	assert(result != NULL);
344
345	/* Free C buffers */
346	free(c_buffer);
347	free(uc_buffer);
348
349	/* Return the document text */
350	return result;
351	}
352
353
354	/*******************************************
355	do a query
356	*******************************************/
357
358	/* do the actual query - the results are written to query_result held on the Java side */
359	JNIEXPORT void JNICALL
360	Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
361	jstring j_base_dir, jstring j_text_path,
362	jstring j_query)
363	{
364	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
365
366	char* index_path;
367	const char* base_dir;
368	const char* text_path;
369	query_data* qd;
370
371	jobject result_ptr;
372	char* query;
373	int i, j;
374
375	jthrowable exc;
376	/* First of all, clear the previous result */
377	/* The result to write to */
378	result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
379	assert(result_ptr != NULL);
380
381	/* Clear any previous query results */
382	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
383	exc = (*j_env)->ExceptionOccurred(j_env);
384	if (exc) {
385	(*j_env)->ExceptionDescribe(j_env);
386	return;
387	}
388
389	/* Make sure an index has been specified */
390	index_path = data->queryInfo->index;
391	if (index_path == NULL) {
392	return;
393	}
394
395	/* Obtain C versions of the two string parameters */
396	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
397	if (base_dir == NULL) {
398	return;
399	}
400	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
401	if (text_path == NULL) {
402	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
403	return;
404	}
405
406	/* Load the appropriate index for satisfying this request */
407	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
408
409	/* The C text strings are no longer needed */
410	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
411	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
412
413	/* Check that the index was loaded successfully */
414	if (qd == NULL) {
415	return;
416	}
417
418	/* Remove anything hanging around from last time */
419	FreeQueryDocs(qd);
420
421	/* Obtain a C version of the query string */
422	query = (char) (j_env)->GetStringUTFChars(j_env, j_query, NULL);
423	if (query == NULL) {
424	return;
425	}
426	printf("Searching for query \"%s\"...\n", query);
427
428	/* Make sure the query isn't empty */
429	if (strlen(query) == 0) {
430	printf("Warning: Empty query.\n");
431	return;
432	}
433
434	/* "Some" queries are done as ranked queries */
435	if (data->defaultBoolCombine == 0) {
436	RankedQueryInfo rqi;
437	rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
438	rqi.Exact = 1; /* Perform exact ranking */
439	rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
440	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
441	/* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
442	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
443	rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
444	}
445
446	rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
447	rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
448	/* rqi.MaxAccums = -1; / / Use as many accumulators as necessary - CRASHES with list */
449	rqi.MaxAccums = 100000;
450	rqi.MaxTerms = -1; /* Use all the query terms */
451	/* rqi.StopAtMaxAccum = 0;/ / Don't care (using as many accumulators as necessary) */
452	rqi.StopAtMaxAccum = 1;
453	rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
454	rqi.skip_dump = NULL; /* Don't dump skip information */
455
456	/* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
457	SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
458	SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
459
460	RankedQuery(qd, query, &rqi);
461	}
462	/* "All" queries are done as boolean queries */
463	else {
464	BooleanQueryInfo bqi;
465	bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
466
467	/* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
468	BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
469	}
470
471	/* Finished with the C query string */
472	(*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
473
474	/* Check the query was processed successfully */
475	if (qd->DL == NULL \|\| qd->QTL == NULL \|\| qd->TL == NULL) {
476	return;
477	}
478
479	/* Record the total number of matching documents */
480	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
481	exc = (*j_env)->ExceptionOccurred(j_env);
482	if (exc) {
483	(*j_env)->ExceptionDescribe(j_env);
484	return;
485	}
486
487	/* Record the matching documents, but only the number requested */
488	printf("Number of matching documents: %d\n", qd->DL->num);
489
490	for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
491	int doc_num = qd->DL->DE[i].DocNum;
492	float doc_weight = qd->DL->DE[i].Weight;
493
494	#if defined(PARADOCNUM) \|\| defined(NZDL)
495	if (qd->id->ifh.InvfLevel == 3) {
496	/* pararaph level, need to convert to doc level*/
497	doc_num = GetDocNumFromParaNum(qd, doc_num);
498	}
499	#endif
500
501
502	/* Call the addDoc function (Java side) to record a matching document */
503	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
504	(jlong) doc_num, (jfloat) doc_weight);
505	exc = (*j_env)->ExceptionOccurred(j_env);
506	if (exc) {
507	(*j_env)->ExceptionDescribe(j_env);
508	return;
509	}
510	}
511
512	/* Record the term information, if desired */
513	if (data->queryInfo->needTermFreqs) {
514	/* The following code is a lot more complicated than it could be, but it is necessary
515	to compensate for an oddity in MG. */
516	unsigned char** stemmed_terms = malloc(sizeof(unsigned char) qd->TL->num);
517
518	printf("Number of terms: %d\n", qd->TL->num);
519	printf("Number of query terms: %d\n", qd->QTL->num);
520
521	/* Generate the stemmed form of each of the relevant terms */
522	for (i = 0; i < qd->TL->num; i++) {
523	u_char* raw_term = qd->TL->TE[i].Word;
524	unsigned int term_length = raw_term[0];
525
526	u_char* raw_stemmed_term = malloc(term_length + 1);
527	unsigned int stemmed_term_length;
528
529	/* Copy the term, and stem it */
530	for (j = 0; j <= term_length; j++)
531	raw_stemmed_term[j] = raw_term[j];
532	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
533
534	/* Allocate memory to store the stemmed term, and fill it */
535	stemmed_term_length = raw_stemmed_term[0];
536	stemmed_terms[i] = malloc(stemmed_term_length + 1);
537	assert(stemmed_terms[i] != NULL);
538	strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
539	stemmed_terms[i][stemmed_term_length] = '\0';
540	}
541
542	/* Record every query term, along with their equivalent terms */
543	for (i = 0; i < qd->QTL->num; i++) {
544	u_char* raw_query_term = qd->QTL->QTE[i].Term;
545	unsigned int query_term_length = raw_query_term[0];
546	unsigned char* query_term;
547	jstring j_query_term;
548
549	u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
550	unsigned int stemmed_query_term_length;
551	unsigned char* stemmed_query_term;
552
553	/* Allocate memory to store the query term, and fill it */
554	query_term = malloc(query_term_length + 1);
555	assert(query_term != NULL);
556	strncpy(query_term, &(raw_query_term[1]), query_term_length);
557	query_term[query_term_length] = '\0';
558
559	/* Allocate a new jstring for the query term */
560	j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
561	assert(j_query_term != NULL);
562
563	/* Call the addTerm function (Java side) to record the query term */
564	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
565	j_query_term, (jint) data->defaultStemMethod);
566	exc = (*j_env)->ExceptionOccurred(j_env);
567	if (exc) {
568	(*j_env)->ExceptionDescribe(j_env);
569	return;
570	}
571
572	/* Copy the query term, and stem it */
573	for (j = 0; j <= query_term_length; j++)
574	raw_stemmed_query_term[j] = raw_query_term[j];
575	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
576
577	/* Allocate memory to store the stemmed query term, and fill it */
578	stemmed_query_term_length = raw_stemmed_query_term[0];
579	stemmed_query_term = malloc(stemmed_query_term_length + 1);
580	assert(stemmed_query_term != NULL);
581	strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
582	stemmed_query_term[stemmed_query_term_length] = '\0';
583
584	/* Find all the terms equivalent to the query term */
585	for (j = 0; j < qd->TL->num; j++) {
586	/* Check if the stemmed query term matches the stemmed term */
587	if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
588	u_char* raw_term = qd->TL->TE[j].Word;
589	unsigned int term_length = raw_term[0];
590	unsigned char* term;
591	jstring j_term;
592
593	/* Allocate memory to store the query term, and fill it */
594	term = malloc(term_length + 1);
595	assert(term != NULL);
596	strncpy(term, &(raw_term[1]), term_length);
597	term[term_length] = '\0';
598
599	/* Allocate a new jstring for the query term */
600	j_term = (*j_env)->NewStringUTF(j_env, term);
601	assert(j_term != NULL);
602
603	/* Call the addEquivTerm function (Java side) to record the equivalent term */
604	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
605	j_query_term, j_term,
606	(jlong) qd->TL->TE[j].WE.doc_count,
607	(jlong) qd->TL->TE[j].WE.count);
608	exc = (*j_env)->ExceptionOccurred(j_env);
609	if (exc) {
610	(*j_env)->ExceptionDescribe(j_env);
611	return;
612	}
613	}
614	}
615	}
616	}
617	}
618
619
620	/*******************************************
621	set query options
622	*******************************************/
623
624	/* Turn casefolding on or off */
625	JNIEXPORT void JNICALL
626	Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
627	jboolean j_on)
628	{
629	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
630
631	if (j_on) {
632	data->defaultStemMethod \|= 1;
633	} else {
634	data->defaultStemMethod &= 0xe;
635	}
636	}
637
638
639	/* Turn stemming on or off */
640	JNIEXPORT void JNICALL
641	Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
642	jboolean j_on)
643	{
644	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
645
646	if (j_on) {
647	data->defaultStemMethod \|= 2;
648	} else {
649	data->defaultStemMethod &= 0xd;
650	}
651	}
652
653
654	/* Set the maximum number of documents to return from a query */
655	JNIEXPORT void JNICALL
656	Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
657	jint j_max)
658	{
659	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
660	data->queryInfo->maxDocs = j_max;
661	}
662
663
664	/* Turn term frequency recording on or off */
665	JNIEXPORT void JNICALL
666	Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
667	jboolean j_on)
668	{
669	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
670	data->queryInfo->needTermFreqs = j_on;
671	}
672
673
674	/* Choose MG index to search */
675	JNIEXPORT void JNICALL
676	Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
677	jstring j_index)
678	{
679	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
680
681	/* Get the index name as a C string */
682	const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
683	assert(index != NULL);
684	printf("Choosing index %s...\n", index);
685
686	/* Free the previous index name */
687	if (data->queryInfo->index)
688	free(data->queryInfo->index);
689
690	/* Allocate memory for the index name, and fill it */
691	data->queryInfo->index = (char*) malloc(strlen(index) + 1);
692	assert(data->queryInfo->index != NULL);
693	strcpy(data->queryInfo->index, index);
694
695	/* Release the index string */
696	(*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
697	}
698
699
700	/* Choose boolean AND or boolean OR queries */
701	JNIEXPORT void JNICALL
702	Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
703	jint j_mode)
704	{
705	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
706	data->defaultBoolCombine = j_mode;
707	}
708
709
710	/* Get a text representation of the current parameter values */
711	JNIEXPORT jstring JNICALL
712	Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
713	{
714	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
715	char result[512]; /* Assume this is big enough */
716
717	/* Print the data to a character array */
718	sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
719	(data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
720	(data->defaultStemMethod & 1),
721	(data->defaultStemMethod & 2),
722	(data->defaultBoolCombine == 1 ? "all" : "some"),
723	(data->queryInfo->maxDocs));
724
725	/* Convert to a jstring, and return */
726	return (*j_env)->NewStringUTF(j_env, result);
727	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: