Context Navigation

source: trunk/gsdl3/packages/mg/jni/MGWrapperImpl.c@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago
merged from branch ant-install-branch: merge 1
Property svn:keywords set to `Author Date Id Revision`
File size: 24.3 KB

Line
1	/*
2	* MGWrapperImpl.c
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19
20
21	#include "MGWrapperImpl.h"
22	#include <jni.h>
23	#include "org_greenstone_mg_MGWrapper.h"
24
25	#include "backend.h"
26	#include "environment.h"
27	#include "text_get.h"
28	#include "stemmer.h"
29
30	/*************************************************************************
31	NOTES
32
33	- Features supported by MGPP but not by MG (AFAIK)
34
35	- Sorting results by rank.
36	Done here as a post-processing operation. Could be more efficient in
37	some cases: the current solution is not very good when the number of
38	matching documents is large and the number of desired matches is
39	small. In this case it would be better to iterate through the array
40	picking out the best documents rather than sorting them all.
41
42	- Asking for query term frequencies to be returned.
43	This cannot be turned off in MG. If the term frequencies are not
44	required, they are simply not passed back to the Java side.
45
46	- Choosing the index for queries.
47	It is possible for MG to build Section and Document indexes (for
48	example), but these are two separate indexes, and must be loaded
49	separately. This module can load more than one index at a time, thus
50	processing queries to different indexes more quickly.
51	NOTE: This replaces TWO options in the MGPP version: returnLevel and
52	queryLevel.
53
54	*************************************************************************/
55
56
57	#define MAX_INDEXES_CACHED 3
58
59	/* copied from mgquery, needed to convert paragraph numbers to document numbers
60	for greenstone */
61	#if defined(PARADOCNUM) \|\| defined(NZDL)
62	static int GetDocNumFromParaNum(query_data *qd, int paranum) {
63	int Documents = qd->td->cth.num_of_docs;
64	int *Paragraph = qd->paragraph;
65	int low = 1, high = Documents;
66	int mid = (low+high)/2;
67
68	while ((mid = (low+high)/2) >=1 && mid <= Documents)
69	{
70	if (paranum > Paragraph[mid])
71	low = mid+1;
72	else if (paranum <= Paragraph[mid-1])
73	high = mid-1;
74	else
75	return mid;
76	}
77	FatalError(1, "Bad paragraph number.\n");
78	return 0;
79	}
80	#endif
81
82
83	/*********************************************
84	initialisation stuff
85	*********************************************/
86
87	/* cached ids for java stuff */
88	jfieldID FID_mg_data = NULL; /* MGWrapperData */
89	jfieldID FID_query_result = NULL; /* MGQueryResult */
90	jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
91	jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
92	jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
93	jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
94	jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
95
96
97	/* to access objects and methods on java side, need their field/method ids -
98	this initialises them at the start to avoid recalculating them each time they
99	are needed
100	Note: the descriptors need to be exactly right, otherwise you get an error
101	saying "no such field" but no reference to the fact that it has the right
102	name but the wrong type.
103	Note: apparently the jclass is a local ref and should only work
104	in the method that created it. It seems to work ok, but I'll make it
105	global cos the book said I should, and it may avoid future hassles.
106	*/
107	JNIEXPORT void JNICALL
108	Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
109	{
110	jclass JC_MGQueryResult;
111
112	/* a long-"J" */
113	FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
114	assert(FID_mg_data != NULL);
115
116	/* an object -"L<class name>;" */
117	FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
118	"Lorg/greenstone/mg/MGQueryResult;");
119	assert(FID_query_result != NULL);
120
121	/* the methods we want to use */
122	JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
123
124	/* addDoc(long doc, float rank) */
125	MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
126	assert(MID_addDoc != NULL);
127
128	/* addTerm(String term, int stem) */
129	MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
130	assert(MID_addTerm != NULL);
131
132	/* addEquivTerm(String term, String equivTerm, long match, long freq) */
133	MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
134	assert(MID_addEquivTerm != NULL);
135
136	/* setTotalDocs(long) */
137	MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
138	assert(MID_setTotalDocs != NULL);
139
140	/* clear(void) */
141	MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
142	assert(MID_clearResult != NULL);
143	}
144
145
146	/* the java side MGWrapper has a pointer to a C object - MGWrapperData
147	initialise this and set the pointer
148	*/
149	JNIEXPORT jboolean JNICALL
150	Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
151	{
152	/* Allocate a MGWrapperData object to store query parameters */
153	MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
154	assert(data != NULL);
155
156	/* Set default values - no stemming, no case-folding, boolean OR queries */
157	data->defaultStemMethod = 0;
158	data->defaultBoolCombine = 0;
159
160	/* Allocate a QueryInfo object to store more query parameters */
161	data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
162	assert(data->queryInfo != NULL);
163
164	/* Set default values - 50 documents max, return term freqs, sort by rank */
165	data->queryInfo->index = NULL;
166	data->queryInfo->maxDocs = 50;
167	data->queryInfo->needTermFreqs = 1;
168
169	/* Save the object on the Java side */
170	(*j_env)->SetIntField(j_env, j_obj, FID_mg_data, (long) data);
171
172	/* Initialise MG environment variables */
173	InitEnv();
174	SetEnv("expert", "true", NULL);
175	SetEnv("mode", "docnums", NULL);
176	return 1; /* true - no errors */
177	}
178
179
180	/*******************************************
181	Index caching
182	*******************************************/
183
184	query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
185
186
187	/* Get the index data necessary to perform a query or document retrieval */
188	query_data*
189	loadIndexData(char* base_dir, char* index_path, char* text_path)
190	{
191	char* index_path_name;
192	char* text_path_name;
193	query_data* qd;
194	int i = 0;
195
196	/* Form the path name of the desired indexes */
197	index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
198	assert(index_path_name != NULL);
199	strcpy(index_path_name, base_dir);
200	strcat(index_path_name, index_path);
201	printf("Index pathname: %s\n", index_path_name);
202
203	text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
204	assert(text_path_name != NULL);
205	strcpy(text_path_name, base_dir);
206	strcat(text_path_name, text_path);
207	printf("Text pathname: %s\n", text_path_name);
208
209	/* Search through the cached indexes for the desired one */
210	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
211	printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
212	printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
213
214	/* Check if the index has already been loaded */
215	if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
216	(strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
217	/* Index has already been loaded and cached, so return it */
218	printf("Found index!\n");
219	free(index_path_name);
220	free(text_path_name);
221	return cached_indexes[i];
222	}
223
224	i++;
225	}
226
227	/* Text strings no longer needed */
228	free(index_path_name);
229	free(text_path_name);
230
231	/* The index is not cached, so load it now */
232	qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
233	if (!qd) {
234	printf("Error: Could not InitQuerySystem()...\n");
235	return NULL;
236	}
237
238	/* The index loaded OK, so cache it */
239	/* This could be a little more sophisticated, eg. replace least frequently used index */
240	if (i >= MAX_INDEXES_CACHED)
241	i = MAX_INDEXES_CACHED - 1;
242
243	/* Free the index being replaced */
244	if (cached_indexes[i] != NULL)
245	FinishQuerySystem(cached_indexes[i]);
246
247	/* Cache the loaded index, and return it */
248	cached_indexes[i] = qd;
249	return cached_indexes[i];
250	}
251
252
253	/* Clean up by unloading all cached indexes */
254	JNIEXPORT jboolean JNICALL
255	Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
256	{
257	/* Free all the loaded indexes */
258	int i = 0;
259	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
260	FinishQuerySystem(cached_indexes[i]);
261	cached_indexes[i] = NULL;
262	i++;
263	}
264
265	return 1; /* true - no errors */
266	}
267
268
269	/****************************************************
270	retrieve a document
271	****************************************************/
272
273	/* Returns a document from mg as a string */
274	JNIEXPORT jstring JNICALL
275	Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
276	jstring j_base_dir, jstring j_text_path,
277	jlong j_docnum)
278	{
279	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
280
281	char* index_path;
282	const char* base_dir;
283	const char* text_path;
284	query_data* qd;
285
286	u_long pos, len;
287	u_char* c_buffer = NULL;
288	u_char* uc_buffer = NULL;
289	int ULen;
290
291	jstring result;
292
293	/* Make sure an index has been specified */
294	index_path = data->queryInfo->index;
295	assert(index_path != NULL);
296
297	/* Obtain C versions of the two string parameters */
298	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
299	if (base_dir == NULL) {
300	return NULL;
301	}
302	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
303	if (text_path == NULL) {
304	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
305	return NULL;
306	}
307
308	/* Load the appropriate index for satisfying this request */
309	printf("Document retrieval, index path: %s\n", index_path);
310	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
311
312	/* The C text strings are no longer needed */
313	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
314	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
315
316	/* Check that the index was loaded successfully */
317	if (qd==NULL) {
318	return NULL;
319	}
320	/assert(qd != NULL);/
321
322	/* Get the document position and length in the text file */
323	printf("Fetching document number %ld...\n", (unsigned long) j_docnum);
324	FetchDocStart(qd, (unsigned long) j_docnum, &pos, &len);
325	printf("Fetched document start. Pos: %ld, Len: %ld\n", pos, len);
326
327	/* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
328	c_buffer = (u_char*) malloc(len);
329	assert(c_buffer != NULL);
330	uc_buffer = (u_char) malloc((int) (qd->td->cth.ratio 1.01 * len) + 100);
331	assert(uc_buffer != NULL);
332
333	/* Seek to the correct position in the file and read the document text */
334	Fseek (qd->td->TextFile, pos, 0);
335	Fread (c_buffer, 1, len, qd->td->TextFile);
336
337	/* Decompress the document text into another buffer, and terminate it */
338	DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
339	uc_buffer[ULen] = '\0';
340
341	/* Load the document text into a Java string */
342	result = (*j_env)->NewStringUTF(j_env, uc_buffer);
343	assert(result != NULL);
344
345	/* Free C buffers */
346	free(c_buffer);
347	free(uc_buffer);
348
349	/* Return the document text */
350	return result;
351	}
352
353
354	/*******************************************
355	do a query
356	*******************************************/
357
358	/* do the actual query - the results are written to query_result held on the Java side */
359	JNIEXPORT void JNICALL
360	Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
361	jstring j_base_dir, jstring j_text_path,
362	jstring j_query)
363	{
364	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
365
366	char* index_path;
367	const char* base_dir;
368	const char* text_path;
369	query_data* qd;
370
371	jobject result_ptr;
372	char* query;
373	int i, j;
374
375	jthrowable exc;
376
377	/* First of all, clear the previous result */
378	/* The result to write to */
379	result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
380	assert(result_ptr != NULL);
381
382	/* Clear any previous query results */
383	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
384	exc = (*j_env)->ExceptionOccurred(j_env);
385	if (exc) {
386	(*j_env)->ExceptionDescribe(j_env);
387	return;
388	}
389
390	/* Make sure an index has been specified */
391	index_path = data->queryInfo->index;
392	assert(index_path != NULL);
393
394	/* Obtain C versions of the two string parameters */
395	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
396	if (base_dir == NULL) {
397	return;
398	}
399	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
400	if (text_path == NULL) {
401	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
402	return;
403	}
404
405	/* Load the appropriate index for satisfying this request */
406	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
407
408	/* The C text strings are no longer needed */
409	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
410	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
411
412	/* Check that the index was loaded successfully */
413	if (qd == NULL) {
414	return;
415	}
416	/assert(qd != NULL);/
417
418	/* Remove anything hanging around from last time */
419	FreeQueryDocs(qd);
420
421	/* Obtain a C version of the query string */
422	query = (char) (j_env)->GetStringUTFChars(j_env, j_query, NULL);
423	assert(query != NULL);
424	printf("Searching for query \"%s\"...\n", query);
425
426	/* Make sure the query isn't empty */
427	if (strlen(query) == 0) {
428	printf("Warning: Empty query.\n");
429	return;
430	}
431
432	/* "Some" queries are done as ranked queries */
433	if (data->defaultBoolCombine == 0) {
434	RankedQueryInfo rqi;
435	rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
436	rqi.Exact = 1; /* Perform exact ranking */
437	rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
438	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
439	/* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
440	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
441	rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
442	}
443
444	rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
445	rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
446	/* rqi.MaxAccums = -1; / / Use as many accumulators as necessary - CRASHES with list */
447	rqi.MaxAccums = 100000;
448	rqi.MaxTerms = -1; /* Use all the query terms */
449	/* rqi.StopAtMaxAccum = 0;/ / Don't care (using as many accumulators as necessary) */
450	rqi.StopAtMaxAccum = 1;
451	rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
452	rqi.skip_dump = NULL; /* Don't dump skip information */
453
454	/* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
455	SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
456	SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
457
458	RankedQuery(qd, query, &rqi);
459	}
460	/* "All" queries are done as boolean queries */
461	else {
462	BooleanQueryInfo bqi;
463	bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
464
465	/* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
466	BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
467	}
468
469	/* Finished with the C query string */
470	(*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
471
472	/* Check the query was processed successfully */
473	assert(qd->DL != NULL);
474	assert(qd->QTL != NULL);
475	assert(qd->TL != NULL);
476
477	/* Record the total number of matching documents */
478	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
479	exc = (*j_env)->ExceptionOccurred(j_env);
480	if (exc) {
481	(*j_env)->ExceptionDescribe(j_env);
482	return;
483	}
484
485	/* Record the matching documents, but only the number requested */
486	printf("Number of matching documents: %d\n", qd->DL->num);
487
488	for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
489	int doc_num = qd->DL->DE[i].DocNum;
490	float doc_weight = qd->DL->DE[i].Weight;
491
492	#if defined(PARADOCNUM) \|\| defined(NZDL)
493	if (qd->id->ifh.InvfLevel == 3) {
494	/* pararaph level, need to convert to doc level*/
495	doc_num = GetDocNumFromParaNum(qd, doc_num);
496	}
497	#endif
498
499
500	/* Call the addDoc function (Java side) to record a matching document */
501	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
502	(jlong) doc_num, (jfloat) doc_weight);
503	exc = (*j_env)->ExceptionOccurred(j_env);
504	if (exc) {
505	(*j_env)->ExceptionDescribe(j_env);
506	return;
507	}
508	}
509
510	/* Record the term information, if desired */
511	if (data->queryInfo->needTermFreqs) {
512	/* The following code is a lot more complicated than it could be, but it is necessary
513	to compensate for an oddity in MG. */
514	unsigned char** stemmed_terms = malloc(sizeof(unsigned char) qd->TL->num);
515
516	printf("Number of terms: %d\n", qd->TL->num);
517	printf("Number of query terms: %d\n", qd->QTL->num);
518
519	/* Generate the stemmed form of each of the relevant terms */
520	for (i = 0; i < qd->TL->num; i++) {
521	u_char* raw_term = qd->TL->TE[i].Word;
522	unsigned int term_length = raw_term[0];
523
524	u_char* raw_stemmed_term = malloc(term_length + 1);
525	unsigned int stemmed_term_length;
526
527	/* Copy the term, and stem it */
528	for (j = 0; j <= term_length; j++)
529	raw_stemmed_term[j] = raw_term[j];
530	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
531
532	/* Allocate memory to store the stemmed term, and fill it */
533	stemmed_term_length = raw_stemmed_term[0];
534	stemmed_terms[i] = malloc(stemmed_term_length + 1);
535	assert(stemmed_terms[i] != NULL);
536	strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
537	stemmed_terms[i][stemmed_term_length] = '\0';
538	}
539
540	/* Record every query term, along with their equivalent terms */
541	for (i = 0; i < qd->QTL->num; i++) {
542	u_char* raw_query_term = qd->QTL->QTE[i].Term;
543	unsigned int query_term_length = raw_query_term[0];
544	unsigned char* query_term;
545	jstring j_query_term;
546
547	u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
548	unsigned int stemmed_query_term_length;
549	unsigned char* stemmed_query_term;
550
551	/* Allocate memory to store the query term, and fill it */
552	query_term = malloc(query_term_length + 1);
553	assert(query_term != NULL);
554	strncpy(query_term, &(raw_query_term[1]), query_term_length);
555	query_term[query_term_length] = '\0';
556
557	/* Allocate a new jstring for the query term */
558	j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
559	assert(j_query_term != NULL);
560
561	/* Call the addTerm function (Java side) to record the query term */
562	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
563	j_query_term, (jint) data->defaultStemMethod);
564	exc = (*j_env)->ExceptionOccurred(j_env);
565	if (exc) {
566	(*j_env)->ExceptionDescribe(j_env);
567	return;
568	}
569
570	/* Copy the query term, and stem it */
571	for (j = 0; j <= query_term_length; j++)
572	raw_stemmed_query_term[j] = raw_query_term[j];
573	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
574
575	/* Allocate memory to store the stemmed query term, and fill it */
576	stemmed_query_term_length = raw_stemmed_query_term[0];
577	stemmed_query_term = malloc(stemmed_query_term_length + 1);
578	assert(stemmed_query_term != NULL);
579	strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
580	stemmed_query_term[stemmed_query_term_length] = '\0';
581
582	/* Find all the terms equivalent to the query term */
583	for (j = 0; j < qd->TL->num; j++) {
584	/* Check if the stemmed query term matches the stemmed term */
585	if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
586	u_char* raw_term = qd->TL->TE[j].Word;
587	unsigned int term_length = raw_term[0];
588	unsigned char* term;
589	jstring j_term;
590
591	/* Allocate memory to store the query term, and fill it */
592	term = malloc(term_length + 1);
593	assert(term != NULL);
594	strncpy(term, &(raw_term[1]), term_length);
595	term[term_length] = '\0';
596
597	/* Allocate a new jstring for the query term */
598	j_term = (*j_env)->NewStringUTF(j_env, term);
599	assert(j_term != NULL);
600
601	/* Call the addEquivTerm function (Java side) to record the equivalent term */
602	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
603	j_query_term, j_term,
604	(jlong) qd->TL->TE[j].WE.doc_count,
605	(jlong) qd->TL->TE[j].WE.count);
606	exc = (*j_env)->ExceptionOccurred(j_env);
607	if (exc) {
608	(*j_env)->ExceptionDescribe(j_env);
609	return;
610	}
611	}
612	}
613	}
614	}
615	}
616
617
618	/*******************************************
619	set query options
620	*******************************************/
621
622	/* Turn casefolding on or off */
623	JNIEXPORT void JNICALL
624	Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
625	jboolean j_on)
626	{
627	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
628
629	if (j_on) {
630	data->defaultStemMethod \|= 1;
631	} else {
632	data->defaultStemMethod &= 0xe;
633	}
634	}
635
636
637	/* Turn stemming on or off */
638	JNIEXPORT void JNICALL
639	Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
640	jboolean j_on)
641	{
642	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
643
644	if (j_on) {
645	data->defaultStemMethod \|= 2;
646	} else {
647	data->defaultStemMethod &= 0xd;
648	}
649	}
650
651
652	/* Set the maximum number of documents to return from a query */
653	JNIEXPORT void JNICALL
654	Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
655	jint j_max)
656	{
657	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
658	data->queryInfo->maxDocs = j_max;
659	}
660
661
662	/* Turn term frequency recording on or off */
663	JNIEXPORT void JNICALL
664	Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
665	jboolean j_on)
666	{
667	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
668	data->queryInfo->needTermFreqs = j_on;
669	}
670
671
672	/* Choose MG index to search */
673	JNIEXPORT void JNICALL
674	Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
675	jstring j_index)
676	{
677	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
678
679	/* Get the index name as a C string */
680	const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
681	assert(index != NULL);
682	printf("Choosing index %s...\n", index);
683
684	/* Free the previous index name */
685	if (data->queryInfo->index)
686	free(data->queryInfo->index);
687
688	/* Allocate memory for the index name, and fill it */
689	data->queryInfo->index = (char*) malloc(strlen(index) + 1);
690	assert(data->queryInfo->index != NULL);
691	strcpy(data->queryInfo->index, index);
692
693	/* Release the index string */
694	(*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
695	}
696
697
698	/* Choose boolean AND or boolean OR queries */
699	JNIEXPORT void JNICALL
700	Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
701	jint j_mode)
702	{
703	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
704	data->defaultBoolCombine = j_mode;
705	}
706
707
708	/* Get a text representation of the current parameter values */
709	JNIEXPORT jstring JNICALL
710	Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
711	{
712	MGWrapperData* data = (MGWrapperData) (j_env)->GetIntField(j_env, j_obj, FID_mg_data);
713	char result[512]; /* Assume this is big enough */
714
715	/* Print the data to a character array */
716	sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%ld\n",
717	(data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
718	(data->defaultStemMethod & 1),
719	(data->defaultStemMethod & 2),
720	(data->defaultBoolCombine == 1 ? "all" : "some"),
721	(data->queryInfo->maxDocs));
722
723	/* Convert to a jstring, and return */
724	return (*j_env)->NewStringUTF(j_env, result);
725	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: