Context Navigation

source: main/trunk/greenstone2/common-src/indexers/mg/jni/MGWrapperImpl.c@ 25244

Last change on this file since 25244 was 25244, checked in by ak19, 12 years ago
GS3 tomcat server crashes because java crashes owing to some error in the JNI code. The error may be related to pointers having been stored as int rather than long, an issue that's become noticeable on 64 bit linux machines. Changes have been made in the JNI code where these pointers that are transferred between Java and C++ code are stored (GetIntField and SetIntField to GetLongField and SetLongField, as well as declaration of data_ptr as jlong not jint). Committing code first without debug statements so the commits can easily be done separately.
Property svn:keywords set to `Author Date Id Revision`
File size: 24.6 KB

Rev	Line
[3743]	1	/*
	2	* MGWrapperImpl.c
	3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	18	*/
	19
	20
	21	#include "MGWrapperImpl.h"
	22	#include <jni.h>
	23	#include "org_greenstone_mg_MGWrapper.h"
	24
	25	#include "backend.h"
	26	#include "environment.h"
[7629]	27	#include "text_get.h"
	28	#include "stemmer.h"
[3743]	29
	30	/*************************************************************************
	31	NOTES
	32
	33	- Features supported by MGPP but not by MG (AFAIK)
	34
	35	- Sorting results by rank.
	36	Done here as a post-processing operation. Could be more efficient in
	37	some cases: the current solution is not very good when the number of
	38	matching documents is large and the number of desired matches is
	39	small. In this case it would be better to iterate through the array
	40	picking out the best documents rather than sorting them all.
	41
	42	- Asking for query term frequencies to be returned.
	43	This cannot be turned off in MG. If the term frequencies are not
[4714]	44	required, they are simply not passed back to the Java side.
[3743]	45
	46	- Choosing the index for queries.
	47	It is possible for MG to build Section and Document indexes (for
	48	example), but these are two separate indexes, and must be loaded
	49	separately. This module can load more than one index at a time, thus
	50	processing queries to different indexes more quickly.
	51	NOTE: This replaces TWO options in the MGPP version: returnLevel and
	52	queryLevel.
	53
	54	*************************************************************************/
	55
	56
[3981]	57	#define MAX_INDEXES_CACHED 3
[3743]	58
[8919]	59	/* copied from mgquery, needed to convert paragraph numbers to document numbers
	60	for greenstone */
	61	#if defined(PARADOCNUM) \|\| defined(NZDL)
	62	static int GetDocNumFromParaNum(query_data *qd, int paranum) {
	63	int Documents = qd->td->cth.num_of_docs;
	64	int *Paragraph = qd->paragraph;
	65	int low = 1, high = Documents;
	66	int mid = (low+high)/2;
[3743]	67
[8919]	68	while ((mid = (low+high)/2) >=1 && mid <= Documents)
	69	{
	70	if (paranum > Paragraph[mid])
	71	low = mid+1;
	72	else if (paranum <= Paragraph[mid-1])
	73	high = mid-1;
	74	else
	75	return mid;
	76	}
	77	FatalError(1, "Bad paragraph number.\n");
	78	return 0;
	79	}
	80	#endif
	81
	82
[3743]	83	/*********************************************
	84	initialisation stuff
	85	*********************************************/
	86
	87	/* cached ids for java stuff */
	88	jfieldID FID_mg_data = NULL; /* MGWrapperData */
	89	jfieldID FID_query_result = NULL; /* MGQueryResult */
	90	jmethodID MID_addDoc = NULL; /* MGQueryResult.addDoc() */
	91	jmethodID MID_addTerm = NULL; /* MGQueryResult.addTerm() */
[3791]	92	jmethodID MID_addEquivTerm = NULL; /* MGQueryResult.addEquivTerm() */
[3743]	93	jmethodID MID_setTotalDocs = NULL; /* MGQueryResult.setTotalDocs() */
	94	jmethodID MID_clearResult = NULL; /* MGQueryResult.clear() */
	95
	96
	97	/* to access objects and methods on java side, need their field/method ids -
	98	this initialises them at the start to avoid recalculating them each time they
	99	are needed
	100	Note: the descriptors need to be exactly right, otherwise you get an error
	101	saying "no such field" but no reference to the fact that it has the right
	102	name but the wrong type.
	103	Note: apparently the jclass is a local ref and should only work
	104	in the method that created it. It seems to work ok, but I'll make it
	105	global cos the book said I should, and it may avoid future hassles.
	106	*/
	107	JNIEXPORT void JNICALL
	108	Java_org_greenstone_mg_MGWrapper_initIDs(JNIEnv *j_env, jclass j_cls)
	109	{
	110	jclass JC_MGQueryResult;
	111
	112	/* a long-"J" */
	113	FID_mg_data = (*j_env)->GetFieldID(j_env, j_cls, "mg_data_ptr_", "J");
	114	assert(FID_mg_data != NULL);
	115
	116	/* an object -"L<class name>;" */
	117	FID_query_result = (*j_env)->GetFieldID(j_env, j_cls, "mg_query_result_",
	118	"Lorg/greenstone/mg/MGQueryResult;");
	119	assert(FID_query_result != NULL);
	120
	121	/* the methods we want to use */
	122	JC_MGQueryResult = (*j_env)->FindClass(j_env, "org/greenstone/mg/MGQueryResult");
	123
	124	/* addDoc(long doc, float rank) */
	125	MID_addDoc = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addDoc", "(JF)V");
	126	assert(MID_addDoc != NULL);
	127
[3791]	128	/* addTerm(String term, int stem) */
	129	MID_addTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addTerm", "(Ljava/lang/String;I)V");
[3743]	130	assert(MID_addTerm != NULL);
	131
[3791]	132	/* addEquivTerm(String term, String equivTerm, long match, long freq) */
	133	MID_addEquivTerm = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "addEquivTerm", "(Ljava/lang/String;Ljava/lang/String;JJ)V");
	134	assert(MID_addEquivTerm != NULL);
	135
[3743]	136	/* setTotalDocs(long) */
	137	MID_setTotalDocs = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "setTotalDocs", "(J)V");
	138	assert(MID_setTotalDocs != NULL);
	139
	140	/* clear(void) */
	141	MID_clearResult = (*j_env)->GetMethodID(j_env, JC_MGQueryResult, "clear", "()V");
	142	assert(MID_clearResult != NULL);
	143	}
	144
	145
	146	/* the java side MGWrapper has a pointer to a C object - MGWrapperData
	147	initialise this and set the pointer
	148	*/
	149	JNIEXPORT jboolean JNICALL
	150	Java_org_greenstone_mg_MGWrapper_initCSide(JNIEnv *j_env, jobject j_obj)
	151	{
	152	/* Allocate a MGWrapperData object to store query parameters */
	153	MGWrapperData* data = (MGWrapperData*) malloc(sizeof(MGWrapperData));
	154	assert(data != NULL);
	155
	156	/* Set default values - no stemming, no case-folding, boolean OR queries */
	157	data->defaultStemMethod = 0;
	158	data->defaultBoolCombine = 0;
	159
	160	/* Allocate a QueryInfo object to store more query parameters */
	161	data->queryInfo = (QueryInfo*) malloc(sizeof(QueryInfo));
	162	assert(data->queryInfo != NULL);
	163
	164	/* Set default values - 50 documents max, return term freqs, sort by rank */
	165	data->queryInfo->index = NULL;
	166	data->queryInfo->maxDocs = 50;
	167	data->queryInfo->needTermFreqs = 1;
	168
	169	/* Save the object on the Java side */
[25244]	170	(*j_env)->SetLongField(j_env, j_obj, FID_mg_data, (long) data);
[3743]	171
	172	/* Initialise MG environment variables */
	173	InitEnv();
	174	SetEnv("expert", "true", NULL);
	175	SetEnv("mode", "docnums", NULL);
[13288]	176
[3743]	177	return 1; /* true - no errors */
	178	}
	179
	180
	181	/*******************************************
	182	Index caching
	183	*******************************************/
	184
	185	query_data* cached_indexes[MAX_INDEXES_CACHED] = { NULL };
	186
	187
	188	/* Get the index data necessary to perform a query or document retrieval */
	189	query_data*
	190	loadIndexData(char* base_dir, char* index_path, char* text_path)
	191	{
	192	char* index_path_name;
	193	char* text_path_name;
	194	query_data* qd;
	195	int i = 0;
	196
	197	/* Form the path name of the desired indexes */
	198	index_path_name = (char*) malloc(strlen(base_dir) + strlen(index_path) + 1);
	199	assert(index_path_name != NULL);
	200	strcpy(index_path_name, base_dir);
	201	strcat(index_path_name, index_path);
	202	printf("Index pathname: %s\n", index_path_name);
	203
	204	text_path_name = (char*) malloc(strlen(base_dir) + strlen(text_path) + 1);
	205	assert(text_path_name != NULL);
	206	strcpy(text_path_name, base_dir);
	207	strcat(text_path_name, text_path);
	208	printf("Text pathname: %s\n", text_path_name);
	209
	210	/* Search through the cached indexes for the desired one */
	211	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
	212	printf("(Cached) Pathname: %s\n", cached_indexes[i]->pathname);
	213	printf("(Cached) Textpathname: %s\n", cached_indexes[i]->textpathname);
	214
	215	/* Check if the index has already been loaded */
	216	if ((strcmp(index_path_name, cached_indexes[i]->pathname) == 0) &&
	217	(strcmp(text_path_name, cached_indexes[i]->textpathname) == 0)) {
	218	/* Index has already been loaded and cached, so return it */
	219	printf("Found index!\n");
	220	free(index_path_name);
	221	free(text_path_name);
	222	return cached_indexes[i];
	223	}
	224
	225	i++;
	226	}
	227
	228	/* Text strings no longer needed */
	229	free(index_path_name);
	230	free(text_path_name);
	231
	232	/* The index is not cached, so load it now */
	233	qd = InitQuerySystem(base_dir, index_path, text_path, NULL);
	234	if (!qd) {
	235	printf("Error: Could not InitQuerySystem()...\n");
	236	return NULL;
	237	}
	238
	239	/* The index loaded OK, so cache it */
	240	/* This could be a little more sophisticated, eg. replace least frequently used index */
	241	if (i >= MAX_INDEXES_CACHED)
	242	i = MAX_INDEXES_CACHED - 1;
	243
	244	/* Free the index being replaced */
	245	if (cached_indexes[i] != NULL)
	246	FinishQuerySystem(cached_indexes[i]);
	247
	248	/* Cache the loaded index, and return it */
	249	cached_indexes[i] = qd;
	250	return cached_indexes[i];
	251	}
	252
	253
	254	/* Clean up by unloading all cached indexes */
	255	JNIEXPORT jboolean JNICALL
	256	Java_org_greenstone_mg_MGWrapper_unloadIndexData(JNIEnv* j_env, jobject j_obj)
	257	{
	258	/* Free all the loaded indexes */
	259	int i = 0;
	260	while (i < MAX_INDEXES_CACHED && cached_indexes[i] != NULL) {
	261	FinishQuerySystem(cached_indexes[i]);
	262	cached_indexes[i] = NULL;
	263	i++;
	264	}
	265
	266	return 1; /* true - no errors */
	267	}
	268
	269
	270	/****************************************************
	271	retrieve a document
	272	****************************************************/
	273
	274	/* Returns a document from mg as a string */
	275	JNIEXPORT jstring JNICALL
	276	Java_org_greenstone_mg_MGWrapper_getDocument(JNIEnv *j_env, jobject j_obj,
	277	jstring j_base_dir, jstring j_text_path,
	278	jlong j_docnum)
	279	{
[25244]	280	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	281
	282	char* index_path;
	283	const char* base_dir;
	284	const char* text_path;
	285	query_data* qd;
	286
[25147]	287	mg_u_long pos, len;
[3743]	288	u_char* c_buffer = NULL;
	289	u_char* uc_buffer = NULL;
	290	int ULen;
	291
	292	jstring result;
	293
	294	/* Make sure an index has been specified */
	295	index_path = data->queryInfo->index;
	296	assert(index_path != NULL);
	297
	298	/* Obtain C versions of the two string parameters */
	299	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
	300	if (base_dir == NULL) {
	301	return NULL;
	302	}
	303	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
	304	if (text_path == NULL) {
	305	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
	306	return NULL;
	307	}
	308
	309	/* Load the appropriate index for satisfying this request */
[3791]	310	printf("Document retrieval, index path: %s\n", index_path);
[3743]	311	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
	312
	313	/* The C text strings are no longer needed */
	314	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
	315	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
	316
	317	/* Check that the index was loaded successfully */
[9874]	318	if (qd==NULL) {
	319	return NULL;
	320	}
	321	/assert(qd != NULL);/
[3743]	322
	323	/* Get the document position and length in the text file */
[25147]	324	printf("Fetching document number %d...\n", (mg_u_long) j_docnum);
	325	FetchDocStart(qd, (mg_u_long) j_docnum, &pos, &len);
	326	printf("Fetched document start. Pos: %d, Len: %d\n", pos, len);
[3743]	327
	328	/* Allocate memory for the document text (from mg/src/text/mgquery.c:RawDocOutput()) */
	329	c_buffer = (u_char*) malloc(len);
	330	assert(c_buffer != NULL);
	331	uc_buffer = (u_char) malloc((int) (qd->td->cth.ratio 1.01 * len) + 100);
	332	assert(uc_buffer != NULL);
	333
	334	/* Seek to the correct position in the file and read the document text */
	335	Fseek (qd->td->TextFile, pos, 0);
	336	Fread (c_buffer, 1, len, qd->td->TextFile);
	337
[3791]	338	/* Decompress the document text into another buffer, and terminate it */
[3743]	339	DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
	340	uc_buffer[ULen] = '\0';
	341
	342	/* Load the document text into a Java string */
	343	result = (*j_env)->NewStringUTF(j_env, uc_buffer);
	344	assert(result != NULL);
	345
	346	/* Free C buffers */
	347	free(c_buffer);
	348	free(uc_buffer);
	349
	350	/* Return the document text */
	351	return result;
	352	}
	353
	354
	355	/*******************************************
	356	do a query
	357	*******************************************/
	358
	359	/* do the actual query - the results are written to query_result held on the Java side */
	360	JNIEXPORT void JNICALL
	361	Java_org_greenstone_mg_MGWrapper_runQuery(JNIEnv *j_env, jobject j_obj,
	362	jstring j_base_dir, jstring j_text_path,
	363	jstring j_query)
	364	{
[25244]	365	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	366
	367	char* index_path;
	368	const char* base_dir;
	369	const char* text_path;
	370	query_data* qd;
	371
	372	jobject result_ptr;
	373	char* query;
	374	int i, j;
	375
	376	jthrowable exc;
[9874]	377	/* First of all, clear the previous result */
	378	/* The result to write to */
	379	result_ptr = (*j_env)->GetObjectField(j_env, j_obj, FID_query_result);
	380	assert(result_ptr != NULL);
	381
	382	/* Clear any previous query results */
	383	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_clearResult);
	384	exc = (*j_env)->ExceptionOccurred(j_env);
	385	if (exc) {
	386	(*j_env)->ExceptionDescribe(j_env);
	387	return;
	388	}
	389
[3743]	390	/* Make sure an index has been specified */
	391	index_path = data->queryInfo->index;
[11021]	392	if (index_path == NULL) {
	393	return;
	394	}
[3743]	395
	396	/* Obtain C versions of the two string parameters */
	397	base_dir = (*j_env)->GetStringUTFChars(j_env, j_base_dir, NULL);
	398	if (base_dir == NULL) {
	399	return;
	400	}
	401	text_path = (*j_env)->GetStringUTFChars(j_env, j_text_path, NULL);
	402	if (text_path == NULL) {
	403	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
	404	return;
	405	}
	406
	407	/* Load the appropriate index for satisfying this request */
	408	qd = loadIndexData((char) base_dir, (char) index_path, (char*) text_path);
	409
	410	/* The C text strings are no longer needed */
	411	(*j_env)->ReleaseStringUTFChars(j_env, j_base_dir, base_dir);
	412	(*j_env)->ReleaseStringUTFChars(j_env, j_text_path, text_path);
	413
	414	/* Check that the index was loaded successfully */
[9874]	415	if (qd == NULL) {
	416	return;
	417	}
[11021]	418
[3743]	419	/* Remove anything hanging around from last time */
	420	FreeQueryDocs(qd);
	421
	422	/* Obtain a C version of the query string */
	423	query = (char) (j_env)->GetStringUTFChars(j_env, j_query, NULL);
[11021]	424	if (query == NULL) {
	425	return;
	426	}
[3743]	427	printf("Searching for query \"%s\"...\n", query);
	428
	429	/* Make sure the query isn't empty */
	430	if (strlen(query) == 0) {
	431	printf("Warning: Empty query.\n");
	432	return;
	433	}
	434
[3791]	435	/* "Some" queries are done as ranked queries */
[3743]	436	if (data->defaultBoolCombine == 0) {
[3791]	437	RankedQueryInfo rqi;
	438	rqi.QueryFreqs = 1; /* Use the frequency of each query term in the query - OK? */
	439	rqi.Exact = 1; /* Perform exact ranking */
	440	rqi.MaxDocsToRetrieve = data->queryInfo->maxDocs; /* Get only the desired number */
	441	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve; /* OK? */
[8919]	442	/* we may need to get more paragraphs to get enough documents. I copied the following from mgquery. it seems to work, not sure why - kjdon */
	443	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas")) {
	444	rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
	445	}
	446
[3791]	447	rqi.Sort = 1; /* Sort the query terms by frequency before ranking */
	448	rqi.AccumMethod = 'L'; /* Use a list when accumulating (has bugs though...) */
[7629]	449	/* rqi.MaxAccums = -1; / / Use as many accumulators as necessary - CRASHES with list */
[3791]	450	rqi.MaxAccums = 100000;
	451	rqi.MaxTerms = -1; /* Use all the query terms */
[7629]	452	/* rqi.StopAtMaxAccum = 0;/ / Don't care (using as many accumulators as necessary) */
[3791]	453	rqi.StopAtMaxAccum = 1;
	454	rqi.HashTblSize = 1000; /* Don't care (not using a hash table) */
	455	rqi.skip_dump = NULL; /* Don't dump skip information */
	456
	457	/* RankedQuery() reads 'casefold' and 'stem' parameters from the environment */
	458	SetEnv("casefold", ((data->defaultStemMethod & 1) ? "on" : "off"), NULL);
	459	SetEnv("stem", ((data->defaultStemMethod & 2) ? "on" : "off"), NULL);
	460
	461	RankedQuery(qd, query, &rqi);
[3743]	462	}
[3791]	463	/* "All" queries are done as boolean queries */
	464	else {
	465	BooleanQueryInfo bqi;
	466	bqi.MaxDocsToRetrieve = data->queryInfo->maxDocs;
[3743]	467
[3791]	468	/* Had to add "words$o" to LIB_OBJS in mg/src/text/Makefile and recompile mg for this */
	469	BooleanQuery(qd, query, &bqi, data->defaultStemMethod);
	470	}
[3743]	471
	472	/* Finished with the C query string */
	473	(*j_env)->ReleaseStringUTFChars(j_env, j_query, query);
	474
	475	/* Check the query was processed successfully */
[11021]	476	if (qd->DL == NULL \|\| qd->QTL == NULL \|\| qd->TL == NULL) {
	477	return;
	478	}
[3743]	479
	480	/* Record the total number of matching documents */
	481	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_setTotalDocs, (jlong) qd->DL->num);
	482	exc = (*j_env)->ExceptionOccurred(j_env);
	483	if (exc) {
	484	(*j_env)->ExceptionDescribe(j_env);
	485	return;
	486	}
	487
	488	/* Record the matching documents, but only the number requested */
	489	printf("Number of matching documents: %d\n", qd->DL->num);
[8919]	490
[3743]	491	for (i = 0; (i < qd->DL->num && i < data->queryInfo->maxDocs); i++) {
	492	int doc_num = qd->DL->DE[i].DocNum;
[8920]	493	float doc_weight = qd->DL->DE[i].Weight;
	494
[8919]	495	#if defined(PARADOCNUM) \|\| defined(NZDL)
	496	if (qd->id->ifh.InvfLevel == 3) {
	497	/* pararaph level, need to convert to doc level*/
	498	doc_num = GetDocNumFromParaNum(qd, doc_num);
	499	}
	500	#endif
[8920]	501
	502
[3743]	503	/* Call the addDoc function (Java side) to record a matching document */
	504	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addDoc,
	505	(jlong) doc_num, (jfloat) doc_weight);
	506	exc = (*j_env)->ExceptionOccurred(j_env);
	507	if (exc) {
	508	(*j_env)->ExceptionDescribe(j_env);
	509	return;
	510	}
	511	}
	512
	513	/* Record the term information, if desired */
	514	if (data->queryInfo->needTermFreqs) {
[3791]	515	/* The following code is a lot more complicated than it could be, but it is necessary
	516	to compensate for an oddity in MG. */
	517	unsigned char** stemmed_terms = malloc(sizeof(unsigned char) qd->TL->num);
	518
[3743]	519	printf("Number of terms: %d\n", qd->TL->num);
	520	printf("Number of query terms: %d\n", qd->QTL->num);
	521
[3791]	522	/* Generate the stemmed form of each of the relevant terms */
	523	for (i = 0; i < qd->TL->num; i++) {
	524	u_char* raw_term = qd->TL->TE[i].Word;
	525	unsigned int term_length = raw_term[0];
	526
	527	u_char* raw_stemmed_term = malloc(term_length + 1);
	528	unsigned int stemmed_term_length;
	529
	530	/* Copy the term, and stem it */
	531	for (j = 0; j <= term_length; j++)
	532	raw_stemmed_term[j] = raw_term[j];
	533	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_term);
	534
	535	/* Allocate memory to store the stemmed term, and fill it */
	536	stemmed_term_length = raw_stemmed_term[0];
	537	stemmed_terms[i] = malloc(stemmed_term_length + 1);
	538	assert(stemmed_terms[i] != NULL);
	539	strncpy(stemmed_terms[i], &(raw_stemmed_term[1]), stemmed_term_length);
	540	stemmed_terms[i][stemmed_term_length] = '\0';
	541	}
	542
	543	/* Record every query term, along with their equivalent terms */
[3743]	544	for (i = 0; i < qd->QTL->num; i++) {
[3791]	545	u_char* raw_query_term = qd->QTL->QTE[i].Term;
	546	unsigned int query_term_length = raw_query_term[0];
[3743]	547	unsigned char* query_term;
	548	jstring j_query_term;
	549
[3791]	550	u_char* raw_stemmed_query_term = malloc(query_term_length + 1);
	551	unsigned int stemmed_query_term_length;
	552	unsigned char* stemmed_query_term;
	553
	554	/* Allocate memory to store the query term, and fill it */
	555	query_term = malloc(query_term_length + 1);
[3743]	556	assert(query_term != NULL);
[3791]	557	strncpy(query_term, &(raw_query_term[1]), query_term_length);
[3743]	558	query_term[query_term_length] = '\0';
	559
	560	/* Allocate a new jstring for the query term */
	561	j_query_term = (*j_env)->NewStringUTF(j_env, query_term);
	562	assert(j_query_term != NULL);
	563
[3791]	564	/* Call the addTerm function (Java side) to record the query term */
	565	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addTerm,
	566	j_query_term, (jint) data->defaultStemMethod);
	567	exc = (*j_env)->ExceptionOccurred(j_env);
	568	if (exc) {
	569	(*j_env)->ExceptionDescribe(j_env);
	570	return;
	571	}
	572
	573	/* Copy the query term, and stem it */
	574	for (j = 0; j <= query_term_length; j++)
	575	raw_stemmed_query_term[j] = raw_query_term[j];
	576	stemmer(data->defaultStemMethod, qd->sd->sdh.stemmer_num, raw_stemmed_query_term);
	577
	578	/* Allocate memory to store the stemmed query term, and fill it */
	579	stemmed_query_term_length = raw_stemmed_query_term[0];
	580	stemmed_query_term = malloc(stemmed_query_term_length + 1);
	581	assert(stemmed_query_term != NULL);
	582	strncpy(stemmed_query_term, &(raw_stemmed_query_term[1]), stemmed_query_term_length);
	583	stemmed_query_term[stemmed_query_term_length] = '\0';
	584
	585	/* Find all the terms equivalent to the query term */
[3743]	586	for (j = 0; j < qd->TL->num; j++) {
[3791]	587	/* Check if the stemmed query term matches the stemmed term */
	588	if (strcmp(stemmed_query_term, stemmed_terms[j]) == 0) {
	589	u_char* raw_term = qd->TL->TE[j].Word;
	590	unsigned int term_length = raw_term[0];
	591	unsigned char* term;
	592	jstring j_term;
[3743]	593
[3791]	594	/* Allocate memory to store the query term, and fill it */
	595	term = malloc(term_length + 1);
	596	assert(term != NULL);
	597	strncpy(term, &(raw_term[1]), term_length);
	598	term[term_length] = '\0';
[3743]	599
[3791]	600	/* Allocate a new jstring for the query term */
	601	j_term = (*j_env)->NewStringUTF(j_env, term);
	602	assert(j_term != NULL);
	603
	604	/* Call the addEquivTerm function (Java side) to record the equivalent term */
	605	(*j_env)->CallVoidMethod(j_env, result_ptr, MID_addEquivTerm,
	606	j_query_term, j_term,
	607	(jlong) qd->TL->TE[j].WE.doc_count,
	608	(jlong) qd->TL->TE[j].WE.count);
[3743]	609	exc = (*j_env)->ExceptionOccurred(j_env);
	610	if (exc) {
	611	(*j_env)->ExceptionDescribe(j_env);
	612	return;
	613	}
	614	}
	615	}
	616	}
	617	}
	618	}
	619
	620
	621	/*******************************************
	622	set query options
	623	*******************************************/
	624
	625	/* Turn casefolding on or off */
	626	JNIEXPORT void JNICALL
	627	Java_org_greenstone_mg_MGWrapper_setCase(JNIEnv *j_env, jobject j_obj,
	628	jboolean j_on)
	629	{
[25244]	630	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	631
	632	if (j_on) {
	633	data->defaultStemMethod \|= 1;
	634	} else {
	635	data->defaultStemMethod &= 0xe;
	636	}
	637	}
	638
	639
	640	/* Turn stemming on or off */
	641	JNIEXPORT void JNICALL
	642	Java_org_greenstone_mg_MGWrapper_setStem(JNIEnv *j_env, jobject j_obj,
	643	jboolean j_on)
	644	{
[25244]	645	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	646
	647	if (j_on) {
	648	data->defaultStemMethod \|= 2;
	649	} else {
	650	data->defaultStemMethod &= 0xd;
	651	}
	652	}
	653
	654
	655	/* Set the maximum number of documents to return from a query */
	656	JNIEXPORT void JNICALL
	657	Java_org_greenstone_mg_MGWrapper_setMaxDocs(JNIEnv *j_env, jobject j_obj,
	658	jint j_max)
	659	{
[25244]	660	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	661	data->queryInfo->maxDocs = j_max;
	662	}
	663
[13288]	664	/* set the maximum number of numeric to split*/
	665	JNIEXPORT void JNICALL
	666	Java_org_greenstone_mg_MGWrapper_setMaxNumeric (JNIEnv *j_env,
	667	jobject j_obj,
	668	jint j_max) {
[3743]	669
[13288]	670	char text[20];
	671	char* maxnumeric;
	672	sprintf(text,"%d",j_max);
	673	maxnumeric = text;
	674	SetEnv("maxnumeric",maxnumeric, NULL);
	675	}
	676
	677
[3743]	678	/* Turn term frequency recording on or off */
	679	JNIEXPORT void JNICALL
	680	Java_org_greenstone_mg_MGWrapper_setReturnTerms(JNIEnv *j_env, jobject j_obj,
	681	jboolean j_on)
	682	{
[25244]	683	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	684	data->queryInfo->needTermFreqs = j_on;
	685	}
	686
	687
	688	/* Choose MG index to search */
	689	JNIEXPORT void JNICALL
	690	Java_org_greenstone_mg_MGWrapper_setIndex(JNIEnv *j_env, jobject j_obj,
	691	jstring j_index)
	692	{
[25244]	693	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	694
	695	/* Get the index name as a C string */
	696	const char* index = (*j_env)->GetStringUTFChars(j_env, j_index, NULL);
	697	assert(index != NULL);
	698	printf("Choosing index %s...\n", index);
	699
	700	/* Free the previous index name */
	701	if (data->queryInfo->index)
	702	free(data->queryInfo->index);
	703
	704	/* Allocate memory for the index name, and fill it */
	705	data->queryInfo->index = (char*) malloc(strlen(index) + 1);
	706	assert(data->queryInfo->index != NULL);
	707	strcpy(data->queryInfo->index, index);
	708
	709	/* Release the index string */
	710	(*j_env)->ReleaseStringUTFChars(j_env, j_index, index);
	711	}
	712
	713
	714	/* Choose boolean AND or boolean OR queries */
	715	JNIEXPORT void JNICALL
	716	Java_org_greenstone_mg_MGWrapper_setMatchMode(JNIEnv *j_env, jobject j_obj,
	717	jint j_mode)
	718	{
[25244]	719	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	720	data->defaultBoolCombine = j_mode;
	721	}
	722
	723
	724	/* Get a text representation of the current parameter values */
	725	JNIEXPORT jstring JNICALL
	726	Java_org_greenstone_mg_MGWrapper_getQueryParams(JNIEnv *j_env, jobject j_obj)
	727	{
[25244]	728	MGWrapperData* data = (MGWrapperData) (j_env)->GetLongField(j_env, j_obj, FID_mg_data);
[3743]	729	char result[512]; /* Assume this is big enough */
	730
	731	/* Print the data to a character array */
[25147]	732	sprintf(result, "Query params:\nindex\t\t%s\ncasefold\t%d\nstem\t\t%d\nquery type\t%s\nmax docs\t%d\n",
[3743]	733	(data->queryInfo->index == NULL ? "<none loaded>" : data->queryInfo->index),
	734	(data->defaultStemMethod & 1),
	735	(data->defaultStemMethod & 2),
	736	(data->defaultBoolCombine == 1 ? "all" : "some"),
	737	(data->queryInfo->maxDocs));
	738
	739	/* Convert to a jstring, and return */
	740	return (*j_env)->NewStringUTF(j_env, result);
	741	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: