Context Navigation

source: main/trunk/greenstone2/common-src/indexers/mg/jni/MGWrapperImpl.c@ 26662

Last change on this file since 26662 was 26662, checked in by davidb, 11 years ago
Support for cross-compilation added. This particular set of changes focus on flags that assist cross-compilation with JNI. Comparable set of changes to the mgpp ones. Note the additional type-casting (intptr_t)
Property svn:keywords set to `Author Date Id Revision`
File size: 25.0 KB

Line
1	/*
2	* MGWrapperImpl.c
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19
20	#include <jni.h>
21
22	#ifdef __MINGW32__
23
24	// Cross compiling for Windows
25	// Want the type definitions in win32 version of jni_md.h but
26	// this then leads to C-mangled style functions which we don't
27	// want. The following achieves this
28
29	#undef JNIEXPORT
30	#undef JNIIMPORT
31	#undef JNICALL
32
33	#define JNIEXPORT
34	#define JNIIMPORT
35	#define JNICALL
36	#endif
37
38	#include "MGWrapperImpl.h"
39	#include "org_greenstone_mg_MGWrapper.h"
40
41
42	#include "backend.h"
43	#include "environment.h"
44	#include "text_get.h"
45	#include "stemmer.h"
46
47	/*************************************************************************
48	NOTES
49
50	- Features supported by MGPP but not by MG (AFAIK)
51
52	- Sorting results by rank.
53	Done here as a post-processing operation. Could be more efficient in
54	some cases: the current solution is not very good when the number of
55	matching documents is large and the number of desired matches is
56	small. In this case it would be better to iterate through the array
57	picking out the best documents rather than sorting them all.
58
59	- Asking for query term frequencies to be returned.
60	This cannot be turned off in MG. If the term frequencies are not
61	required, they are simply not passed back to the Java side.
62
63	- Choosing the index for queries.
64	It is possible for MG to build Section and Document indexes (for
65	example), but these are two separate indexes, and must be loaded
66	separately. This module can load more than one index at a time, thus
67	processing queries to different indexes more quickly.
68	NOTE: This replaces TWO options in the MGPP version: returnLevel and
69	queryLevel.
70
71	*************************************************************************/
72
73
74	#define MAX_INDEXES_CACHED 3
75
76	/* copied from mgquery, needed to convert paragraph numbers to document numbers
77	for greenstone */
78	#if defined(PARADOCNUM) \|\| defined(NZDL)
79	static int GetDocNumFromParaNum(query_data *qd, int paranum) {
80	int Documents = qd->td->cth.num_of_docs;
81	int *Paragraph = qd->paragraph;
82	int low = 1, high = Documents;
83	int mid = (low+high)/2;
84
85	while ((mid = (low+high)/2) >=1 && mid <= Documents)
86	{
87	if (paranum > Paragraph[mid])
88	low = mid+1;
89	else if (paranum <= Paragraph[mid-1])
90	high = mid-1;
91	else
92	return mid;
93	}
94	FatalError(1, "Bad paragraph number.\n");
95	return 0;
96	}
97	#endif
98
99
100	/*********************************************
101	initialisation stuff
102	*********************************************/
103
104	/* cached ids for java stuff */
105	jfieldID FID_mg_data = NULL; /* MGWrapperData */
106	jfieldID FID_query_result = NULL; /* MGQueryResult */
107	jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
108	jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
109	jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
110	jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
111	jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
112
113
114	/* to access objects and methods on java side, need their field/method ids -
115	this initialises them at the start to avoid recalculating them each time they
116	are needed
117	Note: the descriptors need to be exactly right, otherwise you get an error
118	saying "no such field" but no reference to the fact that it has the right
119	name but the wrong type.
120	Note: apparently the jclass is a local ref and should only work
121	in the method that created it. It seems to work ok, but I'll make it
122	global cos the book said I should, and it may avoid future hassles.
123	*/
124	JNIEXPORT void JNICALL
125	Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
126	{
127	jclass JC_MGQueryResult;
128
129	/* a long-"J" */
130	FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
131	assert(FID_mg_data != NULL);
132
133	/* an object -"L<class name>;" */
134	FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
135	"Lorg/greenstone/mg/MGQueryResult;");
136	assert(FID_query_result != NULL);
137
138	/* the methods we want to use */
139	JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
140
141	/* addDoc(long doc, float rank) */
142	MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
143	assert(MID_addDoc != NULL);
144
145	/* addTerm(String term, int stem) */
146	MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
147	assert(MID_addTerm != NULL);
148
149	/* addEquivTerm(String term, String equivTerm, long match, long freq) */
150	MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
151	assert(MID_addEquivTerm != NULL);
152
153	/* setTotalDocs(long) */
154	MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
155	assert(MID_setTotalDocs != NULL);
156
157	/* clear(void) */
158	MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
159	assert(MID_clearResult != NULL);
160	}
161
162
163	/* the java side MGWrapper has a pointer to a C object - MGWrapperData
164	initialise this and set the pointer
165	*/
166	JNIEXPORT jboolean JNICALL
167	Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
168	{
169	/* Allocate a MGWrapperData object to store query parameters */
170	MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
171	assert(data != NULL);
172
173	/* Set default values - no stemming, no case-folding, boolean OR queries */
174	data->defaultStemMethod = 0;
175	data->defaultBoolCombine = 0;
176
177	/* Allocate a QueryInfo object to store more query parameters */
178	data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
179	assert(data->queryInfo != NULL);
180
181	/* Set default values - 50 documents max, return term freqs, sort by rank */
182	data->queryInfo->index = NULL;
183	data->queryInfo->maxDocs = 50;
184	data->queryInfo->needTermFreqs = 1;
185
186	/* Save the object on the Java side */
187	(*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
188
189	/* Initialise MG environment variables */
190	InitEnv();
191	SetEnv("expert", "true", NULL);
192	SetEnv("mode", "docnums", NULL);
193
194	return 1; /* true - no errors */
195	}
196
197
198	/*******************************************
199	Index caching
200	*******************************************/
201
202	query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
203
204
205	/* Get the index data necessary to perform a query or document retrieval */
206	query_data*
207	loadIndexData(char* base_dir, char* index_path, char* text_path)
208	{
209	char* index_path_name;
210	char* text_path_name;
211	query_data* qd;
212	int i = 0;
213
214	/* Form the path name of the desired indexes */
215	index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
216	assert(index_path_name != NULL);
217	strcpy(index_path_name, base_dir);
218	strcat(index_path_name, index_path);
219	printf("Index pathname: %s\n", index_path_name);
220
221	text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
222	assert(text_path_name != NULL);
223	strcpy(text_path_name, base_dir);
224	strcat(text_path_name, text_path);
225	printf("Text pathname: %s\n", text_path_name);
226
227	/* Search through the cached indexes for the desired one */
228	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
229	printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
230	printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
231
232	/* Check if the index has already been loaded */
233	if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
234	(strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
235	/* Index has already been loaded and cached, so return it */
236	printf("Found index!\n");
237	free(index_path_name);
238	free(text_path_name);
239	return cached_indexes[i];
240	}
241
242	i++;
243	}
244
245	/* Text strings no longer needed */
246	free(index_path_name);
247	free(text_path_name);
248
249	/* The index is not cached, so load it now */
250	qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
251	if (!qd) {
252	printf("Error: Could not InitQuerySystem()...\n");
253	return NULL;
254	}
255
256	/* The index loaded OK, so cache it */
257	/* This could be a little more sophisticated, eg. replace least frequently used index */
258	if (i >= MAX_INDEXES_CACHED)
259	i = MAX_INDEXES_CACHED - 1;
260
261	/* Free the index being replaced */
262	if (cached_indexes[i] != NULL)
263	FinishQuerySystem(cached_indexes[i]);
264
265	/* Cache the loaded index, and return it */
266	cached_indexes[i] = qd;
267	return cached_indexes[i];
268	}
269
270
271	/* Clean up by unloading all cached indexes */
272	JNIEXPORT jboolean JNICALL
273	Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
274	{
275	/* Free all the loaded indexes */
276	int i = 0;
277	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
278	FinishQuerySystem(cached_indexes[i]);
279	cached_indexes[i] = NULL;
280	i++;
281	}
282
283	return 1; /* true - no errors */
284	}
285
286
287	/****************************************************
288	retrieve a document
289	****************************************************/
290
291	/* Returns a document from mg as a string */
292	JNIEXPORT jstring JNICALL
293	Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
294	jstring j_base_dir, jstring j_text_path,
295	jlong j_docnum)
296	{
297	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
298
299	char* index_path;
300	const char* base_dir;
301	const char* text_path;
302	query_data* qd;
303
304	mg_u_long pos, len;
305	u_char* c_buffer = NULL;
306	u_char* uc_buffer = NULL;
307	int ULen;
308
309	jstring result;
310
311	/* Make sure an index has been specified */
312	index_path = data->queryInfo->index;
313	assert(index_path != NULL);
314
315	/* Obtain C versions of the two string parameters */
316	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
317	if (base_dir == NULL) {
318	return NULL;
319	}
320	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
321	if (text_path == NULL) {
322	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
323	return NULL;
324	}
325
326	/* Load the appropriate index for satisfying this request */
327	printf("Document retrieval, index path: %s\n", index_path);
328	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
329
330	/* The C text strings are no longer needed */
331	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
332	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
333
334	/* Check that the index was loaded successfully */
335	if (qd==NULL) {
336	return NULL;
337	}
338	/assert(qd != NULL);/
339
340	/* Get the document position and length in the text file */
341	printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
342	FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
343	printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
344
345	/* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
346	c_buffer = (u_char*) malloc(len);
347	assert(c_buffer != NULL);
348	uc_buffer = (u_char) malloc((int) (qd->td->cth.ratio 1.01 * len) + 100);
349	assert(uc_buffer != NULL);
350
351	/* Seek to the correct position in the file and read the document text */
352	Fseek (qd->td->TextFile, pos, 0);
353	Fread (c_buffer, 1, len, qd->td->TextFile);
354
355	/* Decompress the document text into another buffer, and terminate it */
356	DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
357	uc_buffer[ULen] = '\0';
358
359	/* Load the document text into a Java string */
360	result = (*j_env)->NewStringUTF(j_env, uc_buffer);
361	assert(result != NULL);
362
363	/* Free C buffers */
364	free(c_buffer);
365	free(uc_buffer);
366
367	/* Return the document text */
368	return result;
369	}
370
371
372	/*******************************************
373	do a query
374	*******************************************/
375
376	/* do the actual query - the results are written to query_result held on the Java side */
377	JNIEXPORT void JNICALL
378	Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
379	jstring j_base_dir, jstring j_text_path,
380	jstring j_query)
381	{
382	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
383
384	char* index_path;
385	const char* base_dir;
386	const char* text_path;
387	query_data* qd;
388
389	jobject result_ptr;
390	char* query;
391	int i, j;
392
393	jthrowable exc;
394	/* First of all, clear the previous result */
395	/* The result to write to */
396	result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
397	assert(result_ptr != NULL);
398
399	/* Clear any previous query results */
400	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
401	exc = (*j_env)->ExceptionOccurred(j_env);
402	if (exc) {
403	(*j_env)->ExceptionDescribe(j_env);
404	return;
405	}
406
407	/* Make sure an index has been specified */
408	index_path = data->queryInfo->index;
409	if (index_path == NULL) {
410	return;
411	}
412
413	/* Obtain C versions of the two string parameters */
414	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
415	if (base_dir == NULL) {
416	return;
417	}
418	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
419	if (text_path == NULL) {
420	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
421	return;
422	}
423
424	/* Load the appropriate index for satisfying this request */
425	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
426
427	/* The C text strings are no longer needed */
428	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
429	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
430
431	/* Check that the index was loaded successfully */
432	if (qd == NULL) {
433	return;
434	}
435
436	/* Remove anything hanging around from last time */
437	FreeQueryDocs(qd);
438
439	/* Obtain a C version of the query string */
440	query = (char) (j_env)->GetStringUTFChars(j_env, j_query, NULL);
441	if (query == NULL) {
442	return;
443	}
444	printf("Searching for query \"%s\"...\n", query);
445
446	/* Make sure the query isn't empty */
447	if (strlen(query) == 0) {
448	printf("Warning: Empty query.\n");
449	return;
450	}
451
452	/* "Some" queries are done as ranked queries */
453	if (data->defaultBoolCombine == 0) {
454	RankedQueryInfo rqi;
455	rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
456	rqi.Exact = 1; /* Perform exact ranking */
457	rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
458	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
459	/* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
460	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
461	rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
462	}
463
464	rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
465	rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
466	/* rqi.MaxAccums = -1; / / Use as many accumulators as necessary - CRASHES with list */
467	rqi.MaxAccums = 100000;
468	rqi.MaxTerms = -1; /* Use all the query terms */
469	/* rqi.StopAtMaxAccum = 0;/ / Don't care (using as many accumulators as necessary) */
470	rqi.StopAtMaxAccum = 1;
471	rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
472	rqi.skip_dump = NULL; /* Don't dump skip information */
473
474	/* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
475	SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
476	SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
477
478	RankedQuery(qd, query, &rqi);
479	}
480	/* "All" queries are done as boolean queries */
481	else {
482	BooleanQueryInfo bqi;
483	bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
484
485	/* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
486	BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
487	}
488
489	/* Finished with the C query string */
490	(*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
491
492	/* Check the query was processed successfully */
493	if (qd->DL == NULL \|\| qd->QTL == NULL \|\| qd->TL == NULL) {
494	return;
495	}
496
497	/* Record the total number of matching documents */
498	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
499	exc = (*j_env)->ExceptionOccurred(j_env);
500	if (exc) {
501	(*j_env)->ExceptionDescribe(j_env);
502	return;
503	}
504
505	/* Record the matching documents, but only the number requested */
506	printf("Number of matching documents: %d\n", qd->DL->num);
507
508	for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
509	int doc_num = qd->DL->DE[i].DocNum;
510	float doc_weight = qd->DL->DE[i].Weight;
511
512	#if defined(PARADOCNUM) \|\| defined(NZDL)
513	if (qd->id->ifh.InvfLevel == 3) {
514	/* pararaph level, need to convert to doc level*/
515	doc_num = GetDocNumFromParaNum(qd, doc_num);
516	}
517	#endif
518
519
520	/* Call the addDoc function (Java side) to record a matching document */
521	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
522	(jlong) doc_num, (jfloat) doc_weight);
523	exc = (*j_env)->ExceptionOccurred(j_env);
524	if (exc) {
525	(*j_env)->ExceptionDescribe(j_env);
526	return;
527	}
528	}
529
530	/* Record the term information, if desired */
531	if (data->queryInfo->needTermFreqs) {
532	/* The following code is a lot more complicated than it could be, but it is necessary
533	to compensate for an oddity in MG. */
534	unsigned char** stemmed_terms = malloc(sizeof(unsigned char) qd->TL->num);
535
536	printf("Number of terms: %d\n", qd->TL->num);
537	printf("Number of query terms: %d\n", qd->QTL->num);
538
539	/* Generate the stemmed form of each of the relevant terms */
540	for (i = 0; i < qd->TL->num; i++) {
541	u_char* raw_term = qd->TL->TE[i].Word;
542	unsigned int term_length = raw_term[0];
543
544	u_char* raw_stemmed_term = malloc(term_length + 1);
545	unsigned int stemmed_term_length;
546
547	/* Copy the term, and stem it */
548	for (j = 0; j <= term_length; j++)
549	raw_stemmed_term[j] = raw_term[j];
550	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
551
552	/* Allocate memory to store the stemmed term, and fill it */
553	stemmed_term_length = raw_stemmed_term[0];
554	stemmed_terms[i] = malloc(stemmed_term_length + 1);
555	assert(stemmed_terms[i] != NULL);
556	strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
557	stemmed_terms[i][stemmed_term_length] = '\0';
558	}
559
560	/* Record every query term, along with their equivalent terms */
561	for (i = 0; i < qd->QTL->num; i++) {
562	u_char* raw_query_term = qd->QTL->QTE[i].Term;
563	unsigned int query_term_length = raw_query_term[0];
564	unsigned char* query_term;
565	jstring j_query_term;
566
567	u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
568	unsigned int stemmed_query_term_length;
569	unsigned char* stemmed_query_term;
570
571	/* Allocate memory to store the query term, and fill it */
572	query_term = malloc(query_term_length + 1);
573	assert(query_term != NULL);
574	strncpy(query_term, &(raw_query_term[1]), query_term_length);
575	query_term[query_term_length] = '\0';
576
577	/* Allocate a new jstring for the query term */
578	j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
579	assert(j_query_term != NULL);
580
581	/* Call the addTerm function (Java side) to record the query term */
582	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
583	j_query_term, (jint) data->defaultStemMethod);
584	exc = (*j_env)->ExceptionOccurred(j_env);
585	if (exc) {
586	(*j_env)->ExceptionDescribe(j_env);
587	return;
588	}
589
590	/* Copy the query term, and stem it */
591	for (j = 0; j <= query_term_length; j++)
592	raw_stemmed_query_term[j] = raw_query_term[j];
593	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
594
595	/* Allocate memory to store the stemmed query term, and fill it */
596	stemmed_query_term_length = raw_stemmed_query_term[0];
597	stemmed_query_term = malloc(stemmed_query_term_length + 1);
598	assert(stemmed_query_term != NULL);
599	strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
600	stemmed_query_term[stemmed_query_term_length] = '\0';
601
602	/* Find all the terms equivalent to the query term */
603	for (j = 0; j < qd->TL->num; j++) {
604	/* Check if the stemmed query term matches the stemmed term */
605	if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
606	u_char* raw_term = qd->TL->TE[j].Word;
607	unsigned int term_length = raw_term[0];
608	unsigned char* term;
609	jstring j_term;
610
611	/* Allocate memory to store the query term, and fill it */
612	term = malloc(term_length + 1);
613	assert(term != NULL);
614	strncpy(term, &(raw_term[1]), term_length);
615	term[term_length] = '\0';
616
617	/* Allocate a new jstring for the query term */
618	j_term = (*j_env)->NewStringUTF(j_env, term);
619	assert(j_term != NULL);
620
621	/* Call the addEquivTerm function (Java side) to record the equivalent term */
622	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
623	j_query_term, j_term,
624	(jlong) qd->TL->TE[j].WE.doc_count,
625	(jlong) qd->TL->TE[j].WE.count);
626	exc = (*j_env)->ExceptionOccurred(j_env);
627	if (exc) {
628	(*j_env)->ExceptionDescribe(j_env);
629	return;
630	}
631	}
632	}
633	}
634	}
635	}
636
637
638	/*******************************************
639	set query options
640	*******************************************/
641
642	/* Turn casefolding on or off */
643	JNIEXPORT void JNICALL
644	Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
645	jboolean j_on)
646	{
647	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
648
649	if (j_on) {
650	data->defaultStemMethod \|= 1;
651	} else {
652	data->defaultStemMethod &= 0xe;
653	}
654	}
655
656
657	/* Turn stemming on or off */
658	JNIEXPORT void JNICALL
659	Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
660	jboolean j_on)
661	{
662	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
663
664	if (j_on) {
665	data->defaultStemMethod \|= 2;
666	} else {
667	data->defaultStemMethod &= 0xd;
668	}
669	}
670
671
672	/* Set the maximum number of documents to return from a query */
673	JNIEXPORT void JNICALL
674	Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
675	jint j_max)
676	{
677	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
678	data->queryInfo->maxDocs = j_max;
679	}
680
681	/* set the maximum number of numeric to split*/
682	JNIEXPORT void JNICALL
683	Java_org_greenstone_mg_MGWrapper_setMaxNumeric (JNIEnv *j_env,
684	jobject j_obj,
685	jint j_max) {
686
687	char text[20];
688	char* maxnumeric;
689	sprintf(text,"%d",j_max);
690	maxnumeric = text;
691	SetEnv("maxnumeric",maxnumeric, NULL);
692	}
693
694
695	/* Turn term frequency recording on or off */
696	JNIEXPORT void JNICALL
697	Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
698	jboolean j_on)
699	{
700	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
701	data->queryInfo->needTermFreqs = j_on;
702	}
703
704
705	/* Choose MG index to search */
706	JNIEXPORT void JNICALL
707	Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
708	jstring j_index)
709	{
710	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
711
712	/* Get the index name as a C string */
713	const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
714	assert(index != NULL);
715	printf("Choosing index %s...\n", index);
716
717	/* Free the previous index name */
718	if (data->queryInfo->index)
719	free(data->queryInfo->index);
720
721	/* Allocate memory for the index name, and fill it */
722	data->queryInfo->index = (char*) malloc(strlen(index) + 1);
723	assert(data->queryInfo->index != NULL);
724	strcpy(data->queryInfo->index, index);
725
726	/* Release the index string */
727	(*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
728	}
729
730
731	/* Choose boolean AND or boolean OR queries */
732	JNIEXPORT void JNICALL
733	Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
734	jint j_mode)
735	{
736	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
737	data->defaultBoolCombine = j_mode;
738	}
739
740
741	/* Get a text representation of the current parameter values */
742	JNIEXPORT jstring JNICALL
743	Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
744	{
745	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
746	char result[512]; /* Assume this is big enough */
747
748	/* Print the data to a character array */
749	sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
750	(data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
751	(data->defaultStemMethod & 1),
752	(data->defaultStemMethod & 2),
753	(data->defaultBoolCombine == 1 ? "all" : "some"),
754	(data->queryInfo->maxDocs));
755
756	/* Convert to a jstring, and return */
757	return (*j_env)->NewStringUTF(j_env, result);
758	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: