root/main/trunk/greenstone2/common-src/indexers/mgpp/jni/MGPPSearchWrapperImpl.cpp @ 25246

Revision 25246, 17.8 KB (checked in by ak19, 8 years ago)

Needed to turn off debugging before previous commit

Line 
1/*
2 *    MGPPSearchWrapperImpl.cpp
3 *    Copyright (C) 2007 New Zealand Digital Library, http://www.nzdl.org
4 *
5 *    This program is free software; you can redistribute it and/or modify
6 *    it under the terms of the GNU General Public License as published by
7 *    the Free Software Foundation; either version 2 of the License, or
8 *    (at your option) any later version.
9 *
10 *    This program is distributed in the hope that it will be useful,
11 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 *    GNU General Public License for more details.
14 *
15 *    You should have received a copy of the GNU General Public License
16 *    along with this program; if not, write to the Free Software
17 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#ifdef __WIN32__
20#include <win32cfg.h>
21#include <strstream>
22#include <sstream>
23#else
24#ifdef __APPLE__
25#include <strstream>
26#include <sstream>
27#else
28#include <sstream>
29#endif
30#endif
31
32#include <jni.h>
33#include "org_greenstone_mgpp_MGPPSearchWrapper.h"
34#include "MGPPSearchWrapperImpl.h"
35#include "GSDLQueryParser.h"
36#include "MGQuery.h"
37
38// toggle debugging
39//#define _DEBUG
40 
41MGPPSearchData::MGPPSearchData() {
42  indexData = new IndexData();
43  queryInfo = new QueryInfo();
44
45  if (queryInfo==NULL) {
46    cerr<<"couldn't allocate new query info\n";
47    if (indexData!=NULL) {
48      delete indexData;
49    }
50  }
51
52  // set all the default params
53  SetCStr(queryInfo->docLevel, "Document"); // the level to search at
54  queryInfo->maxDocs = 50;
55  queryInfo->sortByRank = true;
56  queryInfo->exactWeights = false;
57  queryInfo->needRankInfo = true;
58  queryInfo->needTermFreqs = true;
59
60  UCArrayClear(level);
61  SetCStr(level, "Document"); // the level to return docs at
62  defaultStemMethod=0;
63  defaultBoolCombine=0;
64  maxNumeric = 4;
65}
66
67MGPPSearchData::~MGPPSearchData() {
68  if (indexData !=NULL) {
69    delete indexData;
70  }
71  if (queryInfo !=NULL) {
72    delete queryInfo;
73  }
74}
75
76// ********************************************
77// initialisation stuff
78// ********************************************
79
80// cached ids for java stuff
81jfieldID FID_mgpp_data = NULL; // MGPPSearchData
82jfieldID FID_query_result = NULL; // MGPPQueryResult
83jmethodID MID_addDoc=NULL; // MGPPQueryResult.addDoc()
84jmethodID MID_addTerm=NULL; // MGPPQueryResult.addTerm()
85jmethodID MID_setTotalDocs=NULL; // MGPPQueryResult.setTotalDocs()
86jmethodID MID_clearResult=NULL; //MGPPQueryResult.clear()
87jmethodID MID_setSyntaxError=NULL; // MGPPQueryResult.setSyntaxError()
88jclass CID_String=NULL; // class ID of String
89
90/* to access objects and methods on java side, need their field/method ids -
91 this initialises them at the start to avoid recalculating them each time they
92 are needed
93Note: the descriptors need to be exactly right, otherwise you get an error
94saying "no such field" but no reference to the fact that it has the right
95name but the wrong type.
96Note: apparently the jclass is a local ref and should only work
97in the method that created it. It seems to work ok, but I'll make it
98 global cos the book said I should, and it may avoid future hassles.
99*/
100JNIEXPORT void JNICALL
101Java_org_greenstone_mgpp_MGPPSearchWrapper_initIDs (JNIEnv *j_env, jclass j_cls) {
102 
103  FID_mgpp_data = j_env->GetFieldID(j_cls, "mgpp_data_ptr_", "J"); //a long-"J"
104  if (FID_mgpp_data==NULL) {
105      cerr <<"MGPP JNI: field mgpp_data_ptr_ not found"<<endl;
106  }
107
108  FID_query_result = j_env->GetFieldID(j_cls, "mgpp_query_result_", "Lorg/greenstone/mgpp/MGPPQueryResult;"); // an object -"L<class name>;"
109  if (FID_query_result==NULL) {
110      cerr <<"MGPP JNI: field mgpp_query_result_ not found"<<endl;
111  }
112  // the methods we want to use
113
114  // addDoc(long doc, float rank)
115  jclass JC_MGPPQueryResult = j_env->FindClass("org/greenstone/mgpp/MGPPQueryResult");
116  MID_addDoc = j_env->GetMethodID(JC_MGPPQueryResult, "addDoc", "(JF)V");
117  if (MID_addDoc==NULL) {
118      cerr <<"MGPP JNI: addDoc method not found"<<endl;
119  }
120  // addTerm(String term, String tag, int stem_method, long match_docs,
121  // long term_freq, String[] equiv_terms)
122  MID_addTerm = j_env->GetMethodID(JC_MGPPQueryResult, "addTerm", "(Ljava/lang/String;Ljava/lang/String;IJJ[Ljava/lang/String;)V");
123  if (MID_addTerm==NULL) {
124      cerr <<"MGPP JNI: method addTerm not found"<<endl;
125  }
126 
127  // setTotalDocs(long)
128  MID_setTotalDocs = j_env->GetMethodID(JC_MGPPQueryResult, "setTotalDocs", "(J)V");
129  if (MID_setTotalDocs==NULL) {
130      cerr <<"MGPP JNI: method setTotalDocs not found"<<endl;
131  }
132 
133  MID_clearResult = j_env->GetMethodID(JC_MGPPQueryResult, "clear", "()V");
134  if (MID_clearResult==NULL) {
135      cerr <<"MGPP JNI: method clear not found"<<endl;
136  }
137  MID_setSyntaxError = j_env->GetMethodID(JC_MGPPQueryResult, "setSyntaxError", "(Z)V");
138  if (MID_clearResult==NULL) {
139      cerr <<"MGPP JNI: method setSyntaxError not found"<<endl;
140  }
141
142  // get the class for String to use in NewObjectArray in runQuery()
143  // FindClass returns a local reference - have to convert it to a global one
144  jclass local_CID_String = j_env->FindClass("java/lang/String");
145  if (local_CID_String==NULL) {
146    cerr <<"MGPP JNI: java String class not found"<<endl;
147  } else {
148    /* create a global ref */
149    CID_String = (jclass)j_env->NewGlobalRef(local_CID_String);
150    /* The local reference is no longer useful */
151    j_env->DeleteLocalRef(local_CID_String);
152 
153    /* Is the global reference created successfully? */
154    if (CID_String == NULL) {
155      return; /* out of memory exception thrown */
156    }
157  }
158 
159}
160
161/* the java side MGPPSearchWrapper has a pointer to a C++ object - MGPPSearchData
162   initialise this and set the pointer
163*/
164JNIEXPORT jboolean JNICALL
165Java_org_greenstone_mgpp_MGPPSearchWrapper_initCppSide (JNIEnv *j_env, jobject j_obj){
166
167#ifdef _DEBUG
168  cerr << "**** JNI debugging for GS3. initCppSide: SetLongField()\n";
169#endif
170
171  MGPPSearchData * data = new MGPPSearchData();
172
173#ifdef _DEBUG
174  fprintf (stderr, "1a. data before SetLongField() is: %ld and as hex: %lX\n", data, data);
175  fprintf (stderr, "1b. FID_mgpp_data before SetLongField() is: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
176#endif
177
178  j_env->SetLongField(j_obj, FID_mgpp_data, (long)data);
179
180#ifdef _DEBUG
181  fprintf (stderr, "1a. data after SetLongField() is: %ld and as hex: %lX\n", data, data);
182  fprintf (stderr, "1b. FID_mgpp_data after SetLongField() is: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
183#endif
184
185  return true;
186
187}
188
189//******************************************
190// do a query
191// ****************************************
192
193/* load the IndexData - cached for querying
194 */
195JNIEXPORT jboolean JNICALL
196Java_org_greenstone_mgpp_MGPPSearchWrapper_loadIndexData (JNIEnv *j_env, jobject j_obj,  jstring j_index_name) {
197#ifdef _DEBUG
198  fprintf (stderr, "in loadIndexData\n");
199#endif
200
201  jlong data_ptr = j_env->GetLongField(j_obj, FID_mgpp_data);
202
203#ifdef _DEBUG
204  fprintf (stderr, "1. data_ptr at start is: %ld and as hex: %lX\n", data_ptr, data_ptr);
205#endif
206
207  MGPPSearchData * data = (MGPPSearchData *)data_ptr;
208
209#ifdef __WIN32__
210  const char* base_dir = "";
211#else
212  const char* base_dir = "/";
213#endif
214
215  const char * index_name = j_env->GetStringUTFChars( j_index_name, NULL);
216  if (index_name==NULL) {
217    return false;
218  }
219 
220  jboolean j_result=false;
221 
222  // why doesn't this complain about const??
223  if (data->indexData->LoadData(base_dir, index_name)) {
224    j_result=true;
225  }
226 
227  // release any gets
228  j_env->ReleaseStringUTFChars(j_index_name, index_name);
229 
230  return j_result;
231}
232
233/* unload the data
234 */
235JNIEXPORT jboolean JNICALL
236Java_org_greenstone_mgpp_MGPPSearchWrapper_unloadIndexData (JNIEnv *j_env, jobject j_obj) {
237
238#ifdef _DEBUG
239  fprintf (stderr, "in unloadIndexData\n");
240#endif
241
242  jlong data_ptr = j_env->GetLongField(j_obj, FID_mgpp_data);
243
244#ifdef _DEBUG
245  fprintf (stderr, "1. data_ptr at start is: %ld and as hex: %lX\n", data_ptr, data_ptr);
246#endif
247
248  MGPPSearchData * data = (MGPPSearchData *)data_ptr;
249
250  data->indexData->UnloadData();
251  return true;
252
253}
254
255/* do the actual query - the results are written to query_result held on the
256   java side */
257JNIEXPORT void JNICALL
258Java_org_greenstone_mgpp_MGPPSearchWrapper_runQuery (JNIEnv *j_env, jobject j_obj, jstring j_query){
259 
260  jthrowable exc; // an exception - check if something funny has happened
261  const char *query = j_env->GetStringUTFChars(j_query, NULL);
262  if (query==NULL) {
263    return; // exception already thrown
264  }
265  // turn to UCArray for mgpp and then release the string
266  UCArray queryArray;
267  SetCStr(queryArray, query);
268  j_env->ReleaseStringUTFChars(j_query, query);
269 
270  // the query data
271  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
272 
273  // the result to write to
274  jobject result_ptr = j_env->GetObjectField(j_obj, FID_query_result);
275  if (result_ptr==NULL) {
276    cerr <<"couldn't access the result to write to"<<endl;
277    return;
278  }
279 
280  // clear the result
281  j_env->CallVoidMethod(result_ptr, MID_clearResult);
282  exc = j_env->ExceptionOccurred(); // this catches the exception I think - it
283  //wont be thrown any further
284  if (exc) {
285    j_env->ExceptionDescribe();
286    return;
287  }
288  // the mgpp QueryResult that we will use
289  ExtQueryResult queryResult;
290
291  QueryNode * queryTree = NULL;
292  // parse the query string into a tree structure
293  queryTree = ParseQuery(queryArray, data->defaultBoolCombine,
294             data->defaultStemMethod, data->maxNumeric);
295  if (queryTree == NULL) {
296    // invalid syntax
297    j_env->CallVoidMethod(result_ptr, MID_setSyntaxError, true);
298    cerr << "MGPPSearchWrapperImpl: invalid query syntax!!\n";
299    return;
300  }
301  // print the query
302  PrintNode (cout, queryTree);
303  // finally, do the query
304  MGQuery(*(data->indexData), *(data->queryInfo), queryTree, queryResult, data->level);
305
306  delete queryTree;
307
308  // convert queryResult to the java side version
309  // use levels rather than docs of ExtQueryResult
310  // CallVoidMethod(obj, method id, args to method)
311  for (int i=0; i<queryResult.levels.size(); i++) {
312    jlong doc = queryResult.levels[i];
313    jfloat rank = queryResult.ranks[i];
314    j_env->CallVoidMethod(result_ptr, MID_addDoc, doc, rank);
315    exc = j_env->ExceptionOccurred();
316    if (exc) {
317      j_env->ExceptionDescribe();
318      return;
319    }
320   
321  }
322
323  // actual num of docs
324  jlong total = queryResult.actualNumDocs;
325  j_env->CallVoidMethod(result_ptr, MID_setTotalDocs, total);
326  exc = j_env->ExceptionOccurred();
327  if (exc) {
328    j_env->ExceptionDescribe();
329    return;
330  }
331
332  // the terms
333  for (int j=0; j<queryResult.termFreqs.size(); j++) {
334   
335    TermFreqData tf = queryResult.termFreqs[j];
336    jstring term = j_env->NewStringUTF(GetCStr(tf.term));
337    jstring tag = j_env->NewStringUTF(GetCStr(tf.tag));
338    jint stem = tf.stemMethod;
339    jlong match = tf.matchDocs;
340    jlong freq = tf.termFreq;
341   
342    jobjectArray equivs=NULL;
343    jstring empty = j_env->NewStringUTF(""); // the initial object to fill the array
344    jint num_equivs = tf.equivTerms.size();
345    equivs = j_env->NewObjectArray(num_equivs, CID_String, empty);
346    if (equivs==NULL) {
347      cerr<<"couldn't create object array"<<endl;
348     
349    } else {
350      for (int k=0; k<num_equivs;k++) {
351    jstring equiv = j_env->NewStringUTF(GetCStr(tf.equivTerms[k]));
352    j_env->SetObjectArrayElement(equivs, k, equiv);
353      }
354     
355     
356      j_env->CallVoidMethod(result_ptr, MID_addTerm, term, tag, stem, match, freq, equivs);
357      exc = j_env->ExceptionOccurred();
358      if (exc) {
359    j_env->ExceptionDescribe();
360    return;
361      }
362    }
363  }
364
365
366}
367
368JNIEXPORT void JNICALL
369Java_org_greenstone_mgpp_MGPPSearchWrapper_setStem (JNIEnv *j_env,
370                          jobject j_obj,
371                          jboolean j_on) {
372  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
373  if (j_on) {
374    data->defaultStemMethod |= 2;
375  } else {
376    data->defaultStemMethod &= 0xd;
377  }
378
379}
380
381JNIEXPORT void JNICALL
382Java_org_greenstone_mgpp_MGPPSearchWrapper_setAccentFold (JNIEnv *j_env,
383                          jobject j_obj,
384                          jboolean j_on) {
385  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
386  if (j_on) {
387    data->defaultStemMethod |= 4;
388  } else {
389    data->defaultStemMethod &= 0xb;
390  }
391}
392
393
394JNIEXPORT void JNICALL
395Java_org_greenstone_mgpp_MGPPSearchWrapper_setCase (JNIEnv *j_env,
396                          jobject j_obj,
397                          jboolean j_on) {
398  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
399
400  if (j_on) {
401    data->defaultStemMethod |= 1;
402  } else {
403    data->defaultStemMethod &= 0xe;
404  }
405}
406
407JNIEXPORT void JNICALL
408Java_org_greenstone_mgpp_MGPPSearchWrapper_setMaxDocs (JNIEnv *j_env,
409                         jobject j_obj,
410                         jint j_max) {
411  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
412  data->queryInfo->maxDocs=j_max;
413}
414
415JNIEXPORT void JNICALL
416Java_org_greenstone_mgpp_MGPPSearchWrapper_setMaxNumeric (JNIEnv *j_env,
417                         jobject j_obj,
418                         jint j_max) {
419  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
420  data->maxNumeric=j_max;
421}
422
423JNIEXPORT void JNICALL
424Java_org_greenstone_mgpp_MGPPSearchWrapper_setSortByRank (JNIEnv *j_env,
425                            jobject j_obj,
426                            jboolean j_on) {
427  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
428 
429  data->queryInfo->sortByRank=j_on;
430}
431
432JNIEXPORT void JNICALL
433Java_org_greenstone_mgpp_MGPPSearchWrapper_setReturnTerms(JNIEnv *j_env,
434                            jobject j_obj,
435                            jboolean j_on) {
436  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
437  data->queryInfo->needTermFreqs = j_on;
438 
439}
440
441JNIEXPORT void JNICALL
442Java_org_greenstone_mgpp_MGPPSearchWrapper_setQueryLevel(JNIEnv *j_env,
443                           jobject j_obj,
444                           jstring j_level){
445 
446  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
447
448  const char * level = j_env->GetStringUTFChars(j_level, NULL);
449  if (level==NULL) {
450    return; // exception already thrown
451  }
452 
453  data->queryInfo->docLevel.clear();
454  SetCStr(data->queryInfo->docLevel, level);
455 
456  // release the java stuff
457  j_env->ReleaseStringUTFChars(j_level, level);
458 
459}
460
461JNIEXPORT void JNICALL
462Java_org_greenstone_mgpp_MGPPSearchWrapper_setReturnLevel(JNIEnv *j_env,
463                            jobject j_obj,
464                            jstring j_level){
465  // print to stderr start of setReturnLevel, print out FID..
466  // %ld or %x -> need to print out pointer -
467  // Later consider field containing unsignedlong instead of long
468 
469#ifdef _DEBUG
470  cerr << "In MGPPSearchWrapperImpl.setReturnLevel()\n";
471  fprintf (stderr, "1. FID_mgpp_data at start is: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
472#endif
473
474  jlong data_ptr = j_env->GetLongField(j_obj, FID_mgpp_data);
475
476#ifdef _DEBUG
477  fprintf (stderr, "1a. data_ptr at start is: %ld and as hex: %lX\n", data_ptr, data_ptr);
478#endif
479
480  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
481
482#ifdef _DEBUG
483  fprintf (stderr, "2a. FID_mgpp_data after data instantiation: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
484  fprintf (stderr, "2b. Pointer value of data upon inst: %ld and as hex: %lX\n", data, data);
485#endif
486
487  // print out FID again.. as long decimal and as hex
488  // %ld or %x -> need to print out pointer -
489  // in the C code in the Setlong bit in this file
490  // And the place on the java side, maybe in the configure
491  //  org.greenstone.gsdl3.service.GS2MGPPSearch.configure(Lorg/w3c/dom/Element;Lorg/w3c/dom/Element;)Z+18
492  // find out what value we're SETTING the value of the pointer to
493  const char * level = j_env->GetStringUTFChars(j_level, NULL);
494  if (level==NULL) {
495    return; // exception already thrown
496  }
497
498#ifdef _DEBUG
499  fprintf (stderr, "3a. FID_mgpp_data after level: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
500  fprintf (stderr, "3b. Pointer value of data after level: %ld and as hex: %lX\n", data, data);
501#endif
502 
503  data->level.clear();
504
505#ifdef _DEBUG
506  fprintf (stderr, "4a. FID_mgpp_data after data->level.clear(): %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
507  fprintf (stderr, "4b. Pointer value of data after level.clear(): %ld and as hex: %lX\n", data, data);
508#endif
509
510  SetCStr(data->level, level);
511
512#ifdef _DEBUG
513  fprintf (stderr, "5a. FID_mgpp_data after SetCStr on data: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
514  fprintf (stderr, "5b. Pointer value of data after SetCStr: %ld and as hex: %lX\n", data, data);
515#endif
516 
517  // release the java stuff
518  j_env->ReleaseStringUTFChars(j_level, level);
519
520#ifdef _DEBUG
521  fprintf (stderr, "5a. FID_mgpp_data at end of setReturnLevel: %ld and as hex: %lX\n", FID_mgpp_data, FID_mgpp_data);
522  fprintf (stderr, "5b. Pointer value of data at end of SetReturnLevel: %ld and as hex: %lX\n", data, data);
523#endif
524
525}
526
527JNIEXPORT void JNICALL
528Java_org_greenstone_mgpp_MGPPSearchWrapper_setMatchMode (JNIEnv *j_env,
529                           jobject j_obj,
530                           jint j_mode){
531
532  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
533  data->defaultBoolCombine=j_mode;
534
535}
536
537JNIEXPORT jstring JNICALL
538Java_org_greenstone_mgpp_MGPPSearchWrapper_getQueryParams (JNIEnv *j_env,
539                             jobject j_obj){
540
541  MGPPSearchData * data = (MGPPSearchData *)j_env->GetLongField(j_obj, FID_mgpp_data);
542 
543  // print the data to a stringstream, then convert to char*, then to
544  //jstring
545
546  stringstream output;
547  output << "Query params:"<<endl
548    // need to change this to use platform specific separator for niceness
549     << "index\t\t"<<data->indexData->basePath<<"/"<<data->indexData->filename<<endl
550     <<"search level\t"<<GetCStr(data->queryInfo->docLevel)<<endl
551     <<"result level\t"<<GetCStr(data->level)<<endl
552     <<"casefold\t"<<(data->defaultStemMethod&1)<<endl
553     <<"stem\t\t"<<(data->defaultStemMethod&2)<<endl
554     <<"order by rank\t"<<data->queryInfo->sortByRank<<endl
555     <<"query type\t"<<(data->defaultBoolCombine==1?"all":"some")<<endl
556         <<"max docs\t"<<data->queryInfo->maxDocs<<endl<<ends;
557
558  const char *result = output.str().c_str();
559  jstring j_result = j_env->NewStringUTF(result);
560  delete (char *)result;
561  return j_result;
562}
Note: See TracBrowser for help on using the browser.