Changeset 2146
- Timestamp:
- 2001-03-09T16:56:35+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/colservr/phrasesearch.cpp
r2139 r2146 78 78 } 79 79 80 static void get_all_docnums (gdbmclass &gdbm, text_t OID, vector<int> &docnum_list) { 81 82 infodbclass OID_info; 83 84 // get OID 85 if (!gdbm.getinfo (OID, OID_info)) return; 86 if (OID_info["hastxt"] == "1" && !OID_info["docnum"].empty()) { 87 docnum_list.push_back (OID_info["docnum"].getint()); 88 } 89 90 // get contents set 91 if (OID_info["contains"].empty()) return; 92 text_tarray contains; text_t tmptext; 93 text_t::iterator contains_here = OID_info["contains"].begin(); 94 text_t::iterator contains_end = OID_info["contains"].end(); 95 while (contains_here != contains_end) { 96 if (*contains_here == '"') tmptext += OID; 97 else if (*contains_here == ';') { 98 if (!tmptext.empty()) contains.push_back (tmptext); 99 tmptext.clear(); 100 } else tmptext.push_back(*contains_here); 101 contains_here++; 102 } 103 if (!tmptext.empty()) contains.push_back (tmptext); 104 105 text_tarray::const_iterator here = contains.begin(); 106 text_tarray::const_iterator end = contains.end(); 107 while (here != end) { 108 get_all_docnums (gdbm, *here, docnum_list); 109 here ++; 110 } 111 } 80 112 81 113 bool doc_phrase_search (unsigned char *doc, int doclen, … … 102 134 while (doc_here <= doc_end) { 103 135 first = true; 104 136 105 137 // there will be at least one member of phrase (see above) 106 138 termfreqclassarray::const_iterator phrase_here = phrase.begin(); … … 126 158 return false; 127 159 } 128 129 160 130 161 // looks for the stemmed phrase in the metadata or text associated with … … 139 170 const termfreqclassarray &phrase, 140 171 int docnum) { 141 // disect the long index to find out where the text should come from142 text_t level, gran;143 text_t::const_iterator longindex_here = longindex.begin();144 text_t::const_iterator longindex_end = longindex.end();145 longindex_here = getdelimitstr (longindex_here, longindex_end, ':', level);146 longindex_here = getdelimitstr (longindex_here, longindex_end, ':', gran);147 148 if (gran.empty()) return false;149 150 // note that we're treating indexes with granularity of 'all' (i.e. text,Title,Creator)151 // as if they were simply 'text' indexes152 if (gran == "text" || gran == "all" || findword(gran.begin(),gran.end(),"text")) {153 char *doc = NULL;154 int doclen = 0;155 156 // get text from mg.157 if (!mgsearch.mgdocument (index, subcollection, language, collection,158 docnum, doc, doclen)) return false;159 return doc_phrase_search ((unsigned char *)doc, doclen, phrase);160 }161 172 162 173 // get OID 163 char *metadata = NULL;164 text_t::size_type metadata_len = 0;165 174 infodbclass docnum_info; 166 infodbclass OID_info;167 168 175 if (!gdbm.getinfo (docnum, docnum_info)) return false; 169 176 text_t &OID = docnum_info["section"]; 170 177 if (OID.empty()) return false; 178 179 // disect the long index to find out where the text should come from 180 text_t gran, type; 181 text_t::const_iterator longindex_here = longindex.begin(); 182 text_t::const_iterator longindex_end = longindex.end(); 183 longindex_here = getdelimitstr (longindex_here, longindex_end, ':', gran); 184 longindex_here = getdelimitstr (longindex_here, longindex_end, ':', type); 185 186 if (gran.empty()) return false; 187 188 // note that we're treating indexes of type 'all' (i.e. text,Title,Creator) 189 // or other composite indexes that contain "text" as if they were simply 'text' indexes 190 if (type == "text" || type == "all" || findword(type.begin(),type.end(),"text")) { 191 char *doc = NULL; 192 int doclen = 0; 193 194 // get text from mg. 195 if (gran == "document") { 196 197 // if this is a document level index (which should only happen if 198 // there are no matching indexes with a finer granularity -- see 199 // mgqueryfilterclass::mg_parse_query_params) then we must do the 200 // phrase search on the entire document (i.e. all the sections) 201 // -- this is going to make a slow process even slower 202 vector<int> docnum_list; text_t fulldoc; 203 get_all_docnums (gdbm, OID, docnum_list); 204 vector<int>::const_iterator this_docnum = docnum_list.begin(); 205 vector<int>::const_iterator end_docnum = docnum_list.end(); 206 while (this_docnum != end_docnum) { 207 if (mgsearch.mgdocument (index, subcollection, language, collection, 208 *this_docnum, doc, doclen)) { 209 fulldoc.appendcstr (doc); 210 } 211 this_docnum ++; 212 } 213 doc = fulldoc.getcstr(); 214 doclen = fulldoc.size(); 215 bool rv = doc_phrase_search ((unsigned char *)doc, doclen, phrase); 216 delete doc; 217 return rv; 218 219 } else { 220 221 if (!mgsearch.mgdocument (index, subcollection, language, collection, 222 docnum, doc, doclen)) return false; 223 return doc_phrase_search ((unsigned char *)doc, doclen, phrase); 224 } 225 } 226 227 char *metadata = NULL; 228 text_t::size_type metadata_len = 0; 229 infodbclass OID_info; 171 230 172 231 // get field … … 174 233 175 234 bool result = false; 176 text_tarray *tarr_ptr = OID_info.getmultinfo ( gran);235 text_tarray *tarr_ptr = OID_info.getmultinfo (type); 177 236 if (tarr_ptr != NULL ) { 178 237 text_tarray::const_iterator subvalue_here = (*tarr_ptr).begin();
Note:
See TracChangeset
for help on using the changeset viewer.