Changeset 319 for trunk/gsdl/src/colservr/mgsearch.cpp
- Timestamp:
- 1999-06-30T16:04:14+12:00 (25 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/colservr/mgsearch.cpp
r301 r319 12 12 /* 13 13 $Log$ 14 Revision 1.8 1999/06/30 04:04:12 rjmcnab 15 made stemming functions available from mgsearch and made the stems 16 for the query terms available in queryinfo 17 14 18 Revision 1.7 1999/06/27 22:07:27 sjboddie 15 19 got rid of all the old functions for dealing with dir indexes … … 86 90 87 91 92 ////////////////////// 93 // useful functions // 94 ////////////////////// 95 96 97 // input and output are in utf8 98 text_t mgsearch_stemword (const text_t &word) { 99 // allocate working stem space 100 int maxstemlen = mgq_getmaxstemlen (); 101 unsigned char *word_stem = new unsigned char [maxstemlen + 2]; 102 if (word_stem == NULL) return ""; 103 104 // copy word to word_stem 105 int len = 0; 106 text_t::const_iterator here = word.begin(); 107 text_t::const_iterator end = word.end(); 108 while (len < maxstemlen && here != end) { 109 word_stem[len+1] = (unsigned char)(*here); 110 len++; here++; 111 } 112 word_stem[len+1] = '\0'; 113 word_stem[0] = len; 114 115 mgq_stemword (word_stem); 116 117 // copy word_stem back to tempstr 118 text_t tempstr; 119 tempstr.setcarr((char *)(&word_stem[1]), word_stem[0]); 120 121 return tempstr; 122 } 123 124 125 88 126 //////////////////////// 89 127 // callback functions // … … 121 159 docresultclass docresult; 122 160 docresult.docnum = DocNum; 123 docresult.docweight = Weight; 124 161 docresult.num_query_terms_matched = (int)(Weight/100.0); // will always be 0 on some versions of mg... 162 docresult.docweight = Weight - docresult.num_query_terms_matched*100; 163 125 164 queryresults->docs.push_back(docresult); 126 165 … … 137 176 termfreqclass termfreq; 138 177 termfreq.termstr = to_uni(term); 178 termfreq.termstemstr = to_uni (mgsearch_stemword (term)); 139 179 termfreq.termfreq = Freq; 140 queryresults-> terms.push_back(termfreq);180 queryresults->orgterms.push_back(termfreq); 141 181 142 182 return 0; … … 144 184 145 185 // this callback is called once for each variation of each term 146 int term scallback(char *Word, int ULen, int /*Freq*/,147 186 int termvariantscallback(char *Word, int ULen, int /*Freq*/, 187 float /*Weight*/, void *info) { 148 188 149 189 text_t term; … … 215 255 } 216 256 257 // you only need to use this function before doing any stemming 258 // casefolding and stemming will be set if values for them are 259 // provided (0 or 1). 260 // makeindexcurrent returns true if it was able to load the database 261 bool mgsearchclass::makeindexcurrent (const text_t &index, 262 const text_t &collection, 263 int casefolding, 264 int stemming) { 265 bool databaseloaded = true; 266 267 // get the names of the collection, index and text suffixes 268 char *ccollection = collection.getcstr(); 269 assert (ccollection != NULL); 270 char *idxsuffix = (getindexsuffix (collection, index)).getcstr(); 271 assert (idxsuffix != NULL); 272 char *txtsuffix = (getindexsuffix (collection, "text")).getcstr(); 273 assert (txtsuffix != NULL); 274 275 #ifdef __WIN32__ 276 char *ccollectdir = (collectdir+"\\").getcstr(); assert (ccollectdir != NULL); 277 #else 278 char *ccollectdir = collectdir.getcstr(); assert (ccollectdir != NULL); 279 #endif 280 281 if (load_database(ccollection, ccollectdir, idxsuffix, txtsuffix)) { 282 if (casefolding == 0) mgq_ask(".set casefold off"); 283 else if (casefolding > 0) mgq_ask(".set casefold on"); 284 if (stemming == 0) mgq_ask(".set stem off"); 285 else if (stemming > 0) mgq_ask(".set stem on"); 286 287 } else databaseloaded = false; 288 289 // free up the c strings 290 delete ccollection; 291 delete idxsuffix; 292 delete txtsuffix; 293 delete ccollectdir; 294 295 return databaseloaded; 296 } 297 298 299 // stem word uses the values set in the last call to makeindexcurrent 300 // to stem the word. It is assumed that word is in unicode 301 text_t mgsearchclass::stemword (const text_t &word) { 302 return to_uni (mgsearch_stemword (to_utf8 (word))); 303 } 304 217 305 218 306 bool mgsearchclass::search(const queryparamclass &queryparams, 219 queryresultsclass &queryresults) 220 { 221 bool databaseloaded = true; 222 307 queryresultsclass &queryresults) { 223 308 assert (cache != NULL); 224 309 … … 226 311 227 312 // first check the cache 228 if (cache->find(queryparams, queryresults)) 229 return true; 313 if (cache->find(queryparams, queryresults)) return true; 230 314 231 315 // make sure there is a query to be processed … … 244 328 casefold = queryparams.casefolding; 245 329 246 // get the names of the collection, index and text suffixes 247 char *ccollection = queryparams.collection.getcstr(); 248 assert (ccollection != NULL); 249 char *idxsuffix = (getindexsuffix (queryparams.collection, 250 queryparams.search_index)).getcstr(); 251 assert (idxsuffix != NULL); 252 char *txtsuffix = (getindexsuffix (queryparams.collection, "text")).getcstr(); 253 assert (txtsuffix != NULL); 254 255 #ifdef __WIN32__ 256 char *ccollectdir = (collectdir+"\\").getcstr(); assert (ccollectdir != NULL); 257 #else 258 char *ccollectdir = collectdir.getcstr(); assert (ccollectdir != NULL); 259 #endif 260 261 if (load_database(ccollection, ccollectdir, idxsuffix, txtsuffix)) 262 { 263 setsearchmode (queryparams); 264 submitquery (queryparams); 265 getresults (queryresults); 266 } 267 else databaseloaded = false; 268 269 // free up the c strings 270 delete ccollection; 271 delete idxsuffix; 272 delete txtsuffix; 273 delete ccollectdir; 274 275 return databaseloaded; 330 if (makeindexcurrent (queryparams.search_index, queryparams.collection)) { 331 setsearchmode (queryparams); 332 submitquery (queryparams); 333 getresults (queryresults); 334 return true; 335 } 336 337 return false; 276 338 } 277 339 … … 280 342 { 281 343 mgq_ask(".set expert true"); 344 mgq_ask(".set sorted_terms true"); 282 345 mgq_ask(".set accumulator_method list"); 283 346 mgq_ask(".set max_accumulators 50000"); … … 353 416 mgq_results(result_termfreqs, 0, MAXNUMTERMS, 354 417 termfreqcallback, (void *)(&queryresults)); 418 queryresults.sortuniqqueryterms(); 419 420 // get term variants 355 421 mgq_results(result_terms, 0, MAXNUMTERMS, 356 termscallback, (void *)(&queryresults)); 357 queryresults.sortqueryterms(); 358 queryresults.uniqqueryterms(); 422 termvariantscallback, (void *)(&queryresults)); 359 423 } 360 424
Note:
See TracChangeset
for help on using the changeset viewer.