/************************************************************************** * * IndexData.cpp -- Information needed for querying * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #include "IndexData.h" #include "string.h" #include "mg_files.h" IndexData::IndexData () { basePath[0] = '\0'; filename[0] = '\0'; dictFile = NULL; /* [JFG - Mar 06: Accent folding patch] */ for(int i=STEM_MIN;i <= STEM_MAX;i++) stemFile[i-1] = NULL; invfFile = NULL; approxWeightsFile = NULL; exactWeightsFile = NULL; curLevelNum = 0; } IndexData::~IndexData () { UnloadData (); } bool IndexData::LoadData (const char *_basePath, const char *_filename) { if (_filename[0] == '\0') return false; // make sure this data has not already been loaded if (strcmp (_basePath, basePath) == 0 && strcmp (_filename, filename) == 0) return true; // make sure there is nothing else loaded UnloadData (); strcpy (basePath, _basePath); strcpy (filename, _filename); set_basepath (basePath); // load in the level information FILE *ivfLevelFile; ivfLevelFile = open_file (filename, INVF_LEVEL_SUFFIX, "rb", MAGIC_INVF_LEVELS, MG_ABORT); levels.Read (ivfLevelFile); fclose (ivfLevelFile); // blocked dictionary dictFile = open_file (filename, INVF_DICT_BLOCKED_SUFFIX, "rb", MAGIC_STEM, MG_ABORT); if (!bdh.Read (dictFile)) { UnloadData (); return false; } fseek (dictFile, bdh.wblk_idx_start, SEEK_SET); if (!ReadBlockIdx (dictFile, biWords)) { UnloadData (); return false; } fseek (dictFile, bdh.tblk_idx_start, SEEK_SET); if (!ReadBlockIdx (dictFile, biTags)) { UnloadData (); return false; } /* [JFG - Mar 06: Accent folding patch] */ // read stem indexes // [KJD - optional stemming patch] // allow no stem indexes for(int stem = STEM_MIN; stem <= STEM_MAX; stem++) { char *suffix = make_suffix (INVF_DICT_BLOCKED_SUFFIX_PAT, stem, NULL); stemFile[stem-1] = open_file (filename, suffix, "rb", MAGIC_STEM_GEN(stem + '0'), MG_MESSAGE); if (stemFile[stem-1]!= NULL) { if (!sih[stem-1].Read (stemFile[stem-1])) { fclose (stemFile[stem-1]); stemFile[stem-1] = NULL; //UnloadData (); return false; } fseek (stemFile[stem-1], sih[stem-1].block_idx_start, SEEK_SET); if (!ReadBlockIdx (stemFile[stem-1], sii[stem-1])) { fclose (stemFile[stem-1]); stemFile[stem-1] = NULL; //UnloadData (); return false; } } } // inverted file invfFile = open_file (filename, INVF_SUFFIX, "rb", MAGIC_INVF, MG_ABORT); ifh.Read (invfFile); // weights approxWeightsFile = open_file (filename, APPROX_WEIGHTS_SUFFIX, "rb", MAGIC_WGHT_APPROX, MG_ABORT); exactWeightsFile = open_file (filename, WEIGHTS_SUFFIX, "rb", MAGIC_WGHT, MG_ABORT); return true; } bool IndexData::UnloadData () { basePath[0] = '\0'; filename[0] = '\0'; if (dictFile != NULL) { fclose (dictFile); dictFile = NULL; } for(int i=STEM_MIN;i <= STEM_MAX;i++) { if (stemFile[i-1] != NULL) { fclose (stemFile[i-1]); stemFile[i-1] = NULL; } } if (invfFile != NULL) { fclose (invfFile); invfFile = NULL; } if (approxWeightsFile != NULL) { fclose (approxWeightsFile); approxWeightsFile = NULL; } if (exactWeightsFile != NULL) { fclose (exactWeightsFile); exactWeightsFile = NULL; } UnloadLevel(); return true; } bool IndexData::LoadLevel (const UCArray &level) { // make sure this level is not already loaded if (level == curLevel) return true; // unload any levels currently loaded UnloadLevel(); // make sure the required files are open if (dictFile == NULL || invfFile == NULL || approxWeightsFile == NULL || exactWeightsFile == NULL) return false; // read in the information from the dictionary block_dict_el tagEl; unsigned long tagElNum; if (!SearchBlockDictEl (dictFile, biTags, bdh.entries_per_tblk, bdh.tag_dict_size, level, tagEl, tagElNum)) return false; // read in the level conversion information if (!levelConverter.Read (invfFile, tagEl.invf_ptr, bdh.num_frags, tagEl.frag_occur)) return false; // read in the approximate weights if (!weightData.Read (approxWeightsFile, levels.levelInfo[level].approxWeightsDiskPtr, levels.levelInfo[level].numEntries)) return false; // find the level number curLevelNum = 0; IvfLevelInfoMap::const_iterator levelHere, levelEnd; for (levelHere=levels.levelInfo.begin(), levelEnd=levels.levelInfo.end(); levelHere!=levelEnd && (*levelHere).first != level; ++levelHere) ++curLevelNum; // make sure we found the level if (levelHere == levelEnd) return false; curLevel = level; return true; } bool IndexData::UnloadLevel () { curLevel.erase (curLevel.begin(), curLevel.end()); curLevelNum = 0; weightData.Free (); return true; }