Changeset 12887
- Timestamp:
- 2006-09-28T11:03:42+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/Terms.cpp
r8691 r12887 210 210 vector<unsigned long> &equivWords) { 211 211 equivWords.erase (equivWords.begin(), equivWords.end()); 212 213 if (stemMethod == 0 || stemMethod==4 || stemMethod==5) { 212 213 /* [JFG - Mar 06: Accent folding patch] */ 214 /* use flag PARTIAL_MATCH */ 215 if (stemMethod == 0 || (stemMethod & STEM_PARTIAL_MATCH)) { 214 216 // don't need to stem the word, 215 217 // find the word number(s) for this term … … 218 220 word_block_dict_el wordDictEl; 219 221 wordDictEl.SetNumLevels (numLevels); 220 if (stemMethod == 0) {222 if (stemMethod == 0) { 221 223 if (SearchWordBlockDictEl (indexData.dictFile, indexData.biWords, 222 224 indexData.bdh.entries_per_wblk, … … 228 230 } else { 229 231 // partial matching, 230 PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords, (stemMethod==5?true:false) ); 232 PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords, 233 (stemMethod & STEM_CaseFolding)? true : false); 234 // TODO: Accent Folding is not handled here!! 231 235 return; 232 236 } … … 234 238 235 239 // need to stem this word and find it in the blocked stem index 236 237 240 unsigned char mgWord[MAXSTEMLEN + 1]; 238 241 UCArray stemTerm; 239 242 unsigned long stemmerNum = 0; 240 if (stemMethod == 1) stemmerNum = indexData.sih1.stemmer_num; 241 else if (stemMethod == 2) stemmerNum = indexData.sih2.stemmer_num; 242 else if (stemMethod == 3) stemmerNum = indexData.sih3.stemmer_num; 243 243 244 /* [JFG - Mar 06: Accent folding patch] */ 245 if(stemMethod > STEM_MAX) { 246 return; 247 //TODO: throw an error here 248 } 249 stemmerNum = indexData.sih[stemMethod-1].stemmer_num; 250 244 251 // convert the word to an "mg word" 245 252 mgWord[0] = term.size(); … … 247 254 248 255 // stem the word 249 stemmer (stemMethod, stemmerNum, mgWord); 250 256 mgpp_stemmer (stemMethod, stemmerNum, mgWord); 251 257 // convert the result back to a UCArray 252 258 stemTerm.insert (stemTerm.end(), &mgWord[1], &mgWord[1] + mgWord[0]); … … 256 262 unsigned long stemElNum; 257 263 bool result = false; 258 if (stemMethod == 1) { 259 result = SearchStemBlockDictEl (indexData.stem1File, 260 indexData.sii1, 261 indexData.sih1.entries_per_block, 262 indexData.sih1.dict_size, 264 265 /* [JFG - Mar 06: Accent folding patch] */ 266 result = SearchStemBlockDictEl (indexData.stemFile[stemMethod-1], 267 indexData.sii[stemMethod-1], 268 indexData.sih[stemMethod-1].entries_per_block, 269 indexData.sih[stemMethod-1].dict_size, 263 270 stemTerm, 264 271 stemDictEl, 265 272 stemElNum); 266 267 } else if (stemMethod == 2) { 268 result = SearchStemBlockDictEl (indexData.stem2File, 269 indexData.sii2, 270 indexData.sih2.entries_per_block, 271 indexData.sih2.dict_size, 272 stemTerm, 273 stemDictEl, 274 stemElNum); 275 276 } else if (stemMethod == 3) { 277 result = SearchStemBlockDictEl (indexData.stem3File, 278 indexData.sii3, 279 indexData.sih3.entries_per_block, 280 indexData.sih3.dict_size, 281 stemTerm, 282 stemDictEl, 283 stemElNum); 284 } 285 273 286 274 if (result) { 287 275 equivWords = stemDictEl.equivWords;
Note:
See TracChangeset
for help on using the changeset viewer.