Changeset 13477 for trunk/indexers/mgpp/text/Terms.cpp
- Timestamp:
- 2006-12-11T11:22:20+13:00 (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/indexers/mgpp/text/Terms.cpp
r8692 r13477 210 210 vector<unsigned long> &equivWords) { 211 211 equivWords.erase (equivWords.begin(), equivWords.end()); 212 213 if (stemMethod == 0 || stemMethod==4 || stemMethod==5) { 212 213 // if the stem method specified is not a valid one (i.e. there was no appropriate stem index, then we set it to 0) 214 // unless we have partial matching, in which case we are not doing stem indexes anyway. 215 if (!(stemMethod & STEM_PARTIAL_MATCH) && indexData.stemFile[stemMethod-1] == NULL) { 216 cerr << "Stem index for method "<<stemMethod<< " was not built, so not doing stemming\n"; 217 stemMethod = 0; 218 } 219 /* [JFG - Mar 06: Accent folding patch] */ 220 /* use flag PARTIAL_MATCH */ 221 if (stemMethod == 0 || (stemMethod & STEM_PARTIAL_MATCH)) { 214 222 // don't need to stem the word, 215 223 // find the word number(s) for this term … … 218 226 word_block_dict_el wordDictEl; 219 227 wordDictEl.SetNumLevels (numLevels); 220 if (stemMethod == 0) {228 if (stemMethod == 0) { 221 229 if (SearchWordBlockDictEl (indexData.dictFile, indexData.biWords, 222 230 indexData.bdh.entries_per_wblk, … … 228 236 } else { 229 237 // partial matching, 230 PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords, (stemMethod==5?true:false) ); 238 PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords, (stemMethod & STEM_CaseFolding)? true : false); 239 // TODO: Accent Folding is not handled here!! 231 240 return; 232 241 } … … 234 243 235 244 // need to stem this word and find it in the blocked stem index 236 237 unsigned char mgWord[MAXSTEMLEN + 1]; 245 unsigned char mgWord[MAXSTEMLEN + 1]; 238 246 UCArray stemTerm; 239 247 unsigned long stemmerNum = 0; 240 if (stemMethod == 1) stemmerNum = indexData.sih1.stemmer_num; 241 else if (stemMethod == 2) stemmerNum = indexData.sih2.stemmer_num; 242 else if (stemMethod == 3) stemmerNum = indexData.sih3.stemmer_num; 243 248 249 /* [JFG - Mar 06: Accent folding patch] */ 250 if(stemMethod > STEM_MAX) { 251 return; 252 //TODO: throw an error here 253 } 254 255 stemmerNum = indexData.sih[stemMethod-1].stemmer_num; 256 244 257 // convert the word to an "mg word" 245 258 mgWord[0] = term.size(); … … 247 260 248 261 // stem the word 249 stemmer (stemMethod, stemmerNum, mgWord); 250 262 mgpp_stemmer (stemMethod, stemmerNum, mgWord); 251 263 // convert the result back to a UCArray 252 264 stemTerm.insert (stemTerm.end(), &mgWord[1], &mgWord[1] + mgWord[0]); … … 256 268 unsigned long stemElNum; 257 269 bool result = false; 258 if (stemMethod == 1) { 259 result = SearchStemBlockDictEl (indexData.stem1File, 260 indexData.sii1, 261 indexData.sih1.entries_per_block, 262 indexData.sih1.dict_size, 270 271 /* [JFG - Mar 06: Accent folding patch] */ 272 result = SearchStemBlockDictEl (indexData.stemFile[stemMethod-1], 273 indexData.sii[stemMethod-1], 274 indexData.sih[stemMethod-1].entries_per_block, 275 indexData.sih[stemMethod-1].dict_size, 263 276 stemTerm, 264 277 stemDictEl, 265 278 stemElNum); 266 267 } else if (stemMethod == 2) { 268 result = SearchStemBlockDictEl (indexData.stem2File, 269 indexData.sii2, 270 indexData.sih2.entries_per_block, 271 indexData.sih2.dict_size, 272 stemTerm, 273 stemDictEl, 274 stemElNum); 275 276 } else if (stemMethod == 3) { 277 result = SearchStemBlockDictEl (indexData.stem3File, 278 indexData.sii3, 279 indexData.sih3.entries_per_block, 280 indexData.sih3.dict_size, 281 stemTerm, 282 stemDictEl, 283 stemElNum); 284 } 285 279 286 280 if (result) { 287 281 equivWords = stemDictEl.equivWords;
Note:
See TracChangeset
for help on using the changeset viewer.