source: indexers/trunk/mgpp/jni/MGPPWrapperImpl.cpp@ 14463

Last change on this file since 14463 was 13479, checked in by shaoqun, 17 years ago

added the set accentfolding method

  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1#ifdef __WIN32__
2#include <WIN32cfg.h>
3#include <strstream>
4#include <sstream>
5#else
6#ifdef __APPLE__
7#include <strstream>
8#include <sstream>
9#else
10#include <sstream>
11#endif
12#endif
13
14#include <jni.h>
15#include "org_greenstone_mgpp_MGPPWrapper.h"
16#include "MGPPWrapperImpl.h"
17#include "TextGet.h"
18#include "GSDLQueryParser.h"
19#include "MGQuery.h"
20
21MGPPWrapperData::MGPPWrapperData() {
22 indexData = new IndexData();
23 queryInfo = new QueryInfo();
24
25 if (queryInfo==NULL) {
26 cerr<<"couldn't allocate new query info\n";
27 if (indexData!=NULL) {
28 delete indexData;
29 }
30 }
31
32 // set all the default params
33 SetCStr(queryInfo->docLevel, "Document"); // the level to search at
34 queryInfo->maxDocs = 50;
35 queryInfo->sortByRank = true;
36 queryInfo->exactWeights = false;
37 queryInfo->needRankInfo = true;
38 queryInfo->needTermFreqs = true;
39
40 UCArrayClear(level);
41 SetCStr(level, "Document"); // the level to return docs at
42 defaultStemMethod=0;
43 defaultBoolCombine=0;
44 maxNumeric = 4;
45}
46
47MGPPWrapperData::~MGPPWrapperData() {
48 if (indexData !=NULL) {
49 delete indexData;
50 }
51 if (queryInfo !=NULL) {
52 delete queryInfo;
53 }
54}
55
56// ********************************************
57// initialisation stuff
58// ********************************************
59
60// cached ids for java stuff
61jfieldID FID_mgpp_data = NULL; // MGPPWrapperData
62jfieldID FID_query_result = NULL; // MGPPQueryResult
63jmethodID MID_addDoc=NULL; // MGPPQueryResult.addDoc()
64jmethodID MID_addTerm=NULL; // MGPPQueryResult.addTerm()
65jmethodID MID_setTotalDocs=NULL; // MGPPQueryResult.setTotalDocs()
66jmethodID MID_clearResult=NULL; //MGPPQueryResult.clear()
67jmethodID MID_setSyntaxError=NULL; // MGPPQueryResult.setSyntaxError()
68jclass CID_String=NULL; // class ID of String
69
70/* to access objects and methods on java side, need their field/method ids -
71 this initialises them at the start to avoid recalculating them each time they
72 are needed
73Note: the descriptors need to be exactly right, otherwise you get an error
74saying "no such field" but no reference to the fact that it has the right
75name but the wrong type.
76Note: apparently the jclass is a local ref and should only work
77in the method that created it. It seems to work ok, but I'll make it
78 global cos the book said I should, and it may avoid future hassles.
79*/
80JNIEXPORT void JNICALL
81Java_org_greenstone_mgpp_MGPPWrapper_initIDs (JNIEnv *j_env, jclass j_cls) {
82
83 FID_mgpp_data = j_env->GetFieldID(j_cls, "mgpp_data_ptr_", "J"); //a long-"J"
84 if (FID_mgpp_data==NULL) {
85 cerr <<"MGPP JNI: field mgpp_data_ptr_ not found"<<endl;
86 }
87
88 FID_query_result = j_env->GetFieldID(j_cls, "mgpp_query_result_", "Lorg/greenstone/mgpp/MGPPQueryResult;"); // an object -"L<class name>;"
89 if (FID_query_result==NULL) {
90 cerr <<"MGPP JNI: field mgpp_query_result_ not found"<<endl;
91 }
92 // the methods we want to use
93
94 // addDoc(long doc, float rank)
95 jclass JC_MGPPQueryResult = j_env->FindClass("org/greenstone/mgpp/MGPPQueryResult");
96 MID_addDoc = j_env->GetMethodID(JC_MGPPQueryResult, "addDoc", "(JF)V");
97 if (MID_addDoc==NULL) {
98 cerr <<"MGPP JNI: addDoc method not found"<<endl;
99 }
100 // addTerm(String term, String tag, int stem_method, long match_docs,
101 // long term_freq, String[] equiv_terms)
102 MID_addTerm = j_env->GetMethodID(JC_MGPPQueryResult, "addTerm", "(Ljava/lang/String;Ljava/lang/String;IJJ[Ljava/lang/String;)V");
103 if (MID_addTerm==NULL) {
104 cerr <<"MGPP JNI: method addTerm not found"<<endl;
105 }
106
107 // setTotalDocs(long)
108 MID_setTotalDocs = j_env->GetMethodID(JC_MGPPQueryResult, "setTotalDocs", "(J)V");
109 if (MID_setTotalDocs==NULL) {
110 cerr <<"MGPP JNI: method setTotalDocs not found"<<endl;
111 }
112
113 MID_clearResult = j_env->GetMethodID(JC_MGPPQueryResult, "clear", "()V");
114 if (MID_clearResult==NULL) {
115 cerr <<"MGPP JNI: method clear not found"<<endl;
116 }
117 MID_setSyntaxError = j_env->GetMethodID(JC_MGPPQueryResult, "setSyntaxError", "(Z)V");
118 if (MID_clearResult==NULL) {
119 cerr <<"MGPP JNI: method setSyntaxError not found"<<endl;
120 }
121
122 // get the class for String to use in NewObjectArray in runQuery()
123 // FindClass returns a local reference - have to convert it to a global one
124 jclass local_CID_String = j_env->FindClass("java/lang/String");
125 if (local_CID_String==NULL) {
126 cerr <<"MGPP JNI: java String class not found"<<endl;
127 } else {
128 /* create a global ref */
129 CID_String = (jclass)j_env->NewGlobalRef(local_CID_String);
130 /* The local reference is no longer useful */
131 j_env->DeleteLocalRef(local_CID_String);
132
133 /* Is the global reference created successfully? */
134 if (CID_String == NULL) {
135 return; /* out of memory exception thrown */
136 }
137 }
138
139}
140
141/* the java side MGPPWrapper has a pointer to a C++ object - MGPPWrapperData
142 initialise this and set the pointer
143*/
144JNIEXPORT jboolean JNICALL
145Java_org_greenstone_mgpp_MGPPWrapper_initCppSide (JNIEnv *j_env, jobject j_obj){
146
147 MGPPWrapperData * data = new MGPPWrapperData();
148 j_env->SetIntField(j_obj, FID_mgpp_data, (long)data);
149
150 return true;
151
152}
153
154//****************************************************
155// retrieve a document
156//****************************************************
157
158/* returns a document from mgpp as a string
159Note: TextData isn't cached - just reloaded each time
160*/
161JNIEXPORT jstring JNICALL
162Java_org_greenstone_mgpp_MGPPWrapper_getDocument (JNIEnv *j_env,
163 jobject j_obj, jstring j_text_name, jstring j_level, jlong j_docnum) {
164
165#ifdef __WIN32__
166 const char* base_dir = "";
167#else
168 const char* base_dir = "/";
169#endif
170
171 const char * text_name = j_env->GetStringUTFChars(j_text_name, NULL);
172 if (text_name==NULL) {
173 return NULL;
174 }
175
176 const char * level = j_env->GetStringUTFChars( j_level, NULL);
177 if (level==NULL) {
178 j_env->ReleaseStringUTFChars(j_text_name, text_name);
179 return NULL;
180 }
181
182 // does this work alright? j_docnum is a long (64 bit)
183 unsigned long docnum = j_docnum;
184 TextData td;
185
186 // cast to char* otherwise complains about const
187 td.LoadData((char *)base_dir, (char *)text_name);
188
189 UCArray mg_level;
190 SetCStr(mg_level, level);
191 UCArray docText;
192 docText.clear();
193 // get the actual text
194 if (!GetDocText(td, mg_level, docnum, docText)) {
195 cerr <<"MGPP JNI: couldn't retrieve doc text"<<endl;
196 }
197
198 td.UnloadData();
199
200 char * doc = GetCStr(docText); // do I need to free this char *??
201 jstring result = j_env->NewStringUTF(doc);
202 // release any gets
203 j_env->ReleaseStringUTFChars(j_text_name, text_name);
204 j_env->ReleaseStringUTFChars(j_level, level);
205
206 // free any C++ stuff
207 delete doc;
208
209 return result;
210
211}
212
213//******************************************
214// do a query
215// ****************************************
216
217/* load the IndexData - cached for querying
218 */
219JNIEXPORT jboolean JNICALL
220Java_org_greenstone_mgpp_MGPPWrapper_loadIndexData (JNIEnv *j_env, jobject j_obj, jstring j_index_name) {
221
222 jint data_ptr = j_env->GetIntField(j_obj, FID_mgpp_data);
223 MGPPWrapperData * data = (MGPPWrapperData *)data_ptr;
224
225#ifdef __WIN32__
226 const char* base_dir = "";
227#else
228 const char* base_dir = "/";
229#endif
230
231 const char * index_name = j_env->GetStringUTFChars( j_index_name, NULL);
232 if (index_name==NULL) {
233 return false;
234 }
235
236 jboolean j_result=false;
237
238 // why doesn't this complain about const??
239 if (data->indexData->LoadData(base_dir, index_name)) {
240 j_result=true;
241 }
242
243 // release any gets
244 j_env->ReleaseStringUTFChars(j_index_name, index_name);
245
246 return j_result;
247}
248
249/* unload the data
250 */
251JNIEXPORT jboolean JNICALL
252Java_org_greenstone_mgpp_MGPPWrapper_unloadIndexData (JNIEnv *j_env, jobject j_obj) {
253
254 jint data_ptr = j_env->GetIntField(j_obj, FID_mgpp_data);
255 MGPPWrapperData * data = (MGPPWrapperData *)data_ptr;
256
257 data->indexData->UnloadData();
258 return true;
259
260}
261
262/* do the actual query - the results are written to query_result held on the
263 java side */
264JNIEXPORT void JNICALL
265Java_org_greenstone_mgpp_MGPPWrapper_runQuery (JNIEnv *j_env, jobject j_obj, jstring j_query){
266
267 jthrowable exc; // an exception - check if something funny has happened
268 const char *query = j_env->GetStringUTFChars(j_query, NULL);
269 if (query==NULL) {
270 return; // exception already thrown
271 }
272 // turn to UCArray for mgpp and then release the string
273 UCArray queryArray;
274 SetCStr(queryArray, query);
275 j_env->ReleaseStringUTFChars(j_query, query);
276
277 // the query data
278 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
279
280 // the result to write to
281 jobject result_ptr = j_env->GetObjectField(j_obj, FID_query_result);
282 if (result_ptr==NULL) {
283 cerr <<"couldn't access the result to write to"<<endl;
284 return;
285 }
286
287 // clear the result
288 j_env->CallVoidMethod(result_ptr, MID_clearResult);
289 exc = j_env->ExceptionOccurred(); // this catches teh exception I think - it
290 //wont be thrown any further
291 if (exc) {
292 j_env->ExceptionDescribe();
293 return;
294 }
295 // the mgpp QueryResult that we will use
296 ExtQueryResult queryResult;
297
298 QueryNode * queryTree = NULL;
299 // parse the query string into a tree structure
300 queryTree = ParseQuery(queryArray, data->defaultBoolCombine,
301 data->defaultStemMethod, data->maxNumeric);
302 if (queryTree == NULL) {
303 // invalid syntax
304 j_env->CallVoidMethod(result_ptr, MID_setSyntaxError, true);
305 cerr << "MGPPWrapperImpl: invalid query syntax!!\n";
306 return;
307 }
308 // print the query
309 PrintNode (cout, queryTree);
310 // finally, do the query
311 MGQuery(*(data->indexData), *(data->queryInfo), queryTree, queryResult, data->level);
312
313 delete queryTree;
314
315 // convert queryResult to the java side version
316 // use levels rather than docs of ExtQueryResult
317 // CallVoidMethod(obj, method id, args to method)
318 for (int i=0; i<queryResult.levels.size(); i++) {
319 jlong doc = queryResult.levels[i];
320 jfloat rank = queryResult.ranks[i];
321 j_env->CallVoidMethod(result_ptr, MID_addDoc, doc, rank);
322 exc = j_env->ExceptionOccurred();
323 if (exc) {
324 j_env->ExceptionDescribe();
325 return;
326 }
327
328 }
329
330 // actual num of docs
331 jlong total = queryResult.actualNumDocs;
332 j_env->CallVoidMethod(result_ptr, MID_setTotalDocs, total);
333 exc = j_env->ExceptionOccurred();
334 if (exc) {
335 j_env->ExceptionDescribe();
336 return;
337 }
338
339 // the terms
340 for (int j=0; j<queryResult.termFreqs.size(); j++) {
341
342 TermFreqData tf = queryResult.termFreqs[j];
343 jstring term = j_env->NewStringUTF(GetCStr(tf.term));
344 jstring tag = j_env->NewStringUTF(GetCStr(tf.tag));
345 jint stem = tf.stemMethod;
346 jlong match = tf.matchDocs;
347 jlong freq = tf.termFreq;
348
349 jobjectArray equivs=NULL;
350 jstring empty = j_env->NewStringUTF(""); // the initial object to fill the array
351 jint num_equivs = tf.equivTerms.size();
352 equivs = j_env->NewObjectArray(num_equivs, CID_String, empty);
353 if (equivs==NULL) {
354 cerr<<"couldn't create object array"<<endl;
355
356 } else {
357 for (int k=0; k<num_equivs;k++) {
358 jstring equiv = j_env->NewStringUTF(GetCStr(tf.equivTerms[k]));
359 j_env->SetObjectArrayElement(equivs, k, equiv);
360 }
361
362
363 j_env->CallVoidMethod(result_ptr, MID_addTerm, term, tag, stem, match, freq, equivs);
364 exc = j_env->ExceptionOccurred();
365 if (exc) {
366 j_env->ExceptionDescribe();
367 return;
368 }
369 }
370 }
371
372
373}
374
375JNIEXPORT void JNICALL
376Java_org_greenstone_mgpp_MGPPWrapper_setStem (JNIEnv *j_env,
377 jobject j_obj,
378 jboolean j_on) {
379 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
380 if (j_on) {
381 data->defaultStemMethod |= 2;
382 } else {
383 data->defaultStemMethod &= 0xd;
384 }
385
386}
387
388JNIEXPORT void JNICALL
389Java_org_greenstone_mgpp_MGPPWrapper_setAccentFold (JNIEnv *j_env,
390 jobject j_obj,
391 jboolean j_on) {
392 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
393 if (j_on) {
394 data->defaultStemMethod |= 4;
395 } else {
396 data->defaultStemMethod &= 0xb;
397 }
398}
399
400
401JNIEXPORT void JNICALL
402Java_org_greenstone_mgpp_MGPPWrapper_setCase (JNIEnv *j_env,
403 jobject j_obj,
404 jboolean j_on) {
405 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
406
407 if (j_on) {
408 data->defaultStemMethod |= 1;
409 } else {
410 data->defaultStemMethod &= 0xe;
411 }
412}
413
414JNIEXPORT void JNICALL
415Java_org_greenstone_mgpp_MGPPWrapper_setMaxDocs (JNIEnv *j_env,
416 jobject j_obj,
417 jint j_max) {
418 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
419 data->queryInfo->maxDocs=j_max;
420}
421
422JNIEXPORT void JNICALL
423Java_org_greenstone_mgpp_MGPPWrapper_setMaxNumeric (JNIEnv *j_env,
424 jobject j_obj,
425 jint j_max) {
426 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
427 data->maxNumeric=j_max;
428}
429
430JNIEXPORT void JNICALL
431Java_org_greenstone_mgpp_MGPPWrapper_setSortByRank (JNIEnv *j_env,
432 jobject j_obj,
433 jboolean j_on) {
434 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
435
436 data->queryInfo->sortByRank=j_on;
437}
438
439JNIEXPORT void JNICALL
440Java_org_greenstone_mgpp_MGPPWrapper_setReturnTerms(JNIEnv *j_env,
441 jobject j_obj,
442 jboolean j_on) {
443 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
444 data->queryInfo->needTermFreqs = j_on;
445
446}
447
448JNIEXPORT void JNICALL
449Java_org_greenstone_mgpp_MGPPWrapper_setQueryLevel(JNIEnv *j_env,
450 jobject j_obj,
451 jstring j_level){
452
453 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
454
455 const char * level = j_env->GetStringUTFChars(j_level, NULL);
456 if (level==NULL) {
457 return; // exception already thrown
458 }
459
460 data->queryInfo->docLevel.clear();
461 SetCStr(data->queryInfo->docLevel, level);
462
463 // release the java stuff
464 j_env->ReleaseStringUTFChars(j_level, level);
465
466}
467
468JNIEXPORT void JNICALL
469Java_org_greenstone_mgpp_MGPPWrapper_setReturnLevel(JNIEnv *j_env,
470 jobject j_obj,
471 jstring j_level){
472
473 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
474
475 const char * level = j_env->GetStringUTFChars(j_level, NULL);
476 if (level==NULL) {
477 return; // exception already thrown
478 }
479
480 data->level.clear();
481 SetCStr(data->level, level);
482
483 // release the java stuff
484 j_env->ReleaseStringUTFChars(j_level, level);
485
486
487}
488
489JNIEXPORT void JNICALL
490Java_org_greenstone_mgpp_MGPPWrapper_setMatchMode (JNIEnv *j_env,
491 jobject j_obj,
492 jint j_mode){
493
494 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
495 data->defaultBoolCombine=j_mode;
496
497}
498
499JNIEXPORT jstring JNICALL
500Java_org_greenstone_mgpp_MGPPWrapper_getQueryParams (JNIEnv *j_env,
501 jobject j_obj){
502
503 MGPPWrapperData * data = (MGPPWrapperData *)j_env->GetIntField(j_obj, FID_mgpp_data);
504
505 // print the data to a stringstream, then convert to char*, then to
506 //jstring
507
508 stringstream output;
509 output << "Query params:"<<endl
510 // need to change this to use platform specific separator for niceness
511 << "index\t\t"<<data->indexData->basePath<<"/"<<data->indexData->filename<<endl
512 <<"search level\t"<<GetCStr(data->queryInfo->docLevel)<<endl
513 <<"result level\t"<<GetCStr(data->level)<<endl
514 <<"casefold\t"<<(data->defaultStemMethod&1)<<endl
515 <<"stem\t\t"<<(data->defaultStemMethod&2)<<endl
516 <<"order by rank\t"<<data->queryInfo->sortByRank<<endl
517 <<"query type\t"<<(data->defaultBoolCombine==1?"all":"some")<<endl
518 <<"max docs\t"<<data->queryInfo->maxDocs<<endl<<ends;
519
520 const char *result = output.str().c_str();
521 jstring j_result = j_env->NewStringUTF(result);
522 delete (char *)result;
523 return j_result;
524}
Note: See TracBrowser for help on using the repository browser.