Changeset 22923
- Timestamp:
- 2010-09-20T10:52:00+12:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/runtime-src/src/recpt/summarise.cpp
r20805 r22923 33 33 #include <string.h> 34 34 35 #include <iostream> 36 using namespace std; 35 37 36 38 /* **************** LOCAL PROTOTYPES **************** */ … … 99 101 *****************************************************/ 100 102 101 text_t summarise_keywords(text_t &htmlstr, text_t &query, int summaryLength) { 103 text_t summarise_keywords(text_t &htmlstr, text_t &query, int summaryLength) 104 { 102 105 103 106 if ((query.size()==0) || (htmlstr.size()==0)) { … … 109 112 110 113 // consider only non-empty terms 111 for (text_tarray::iterator term = allterms.begin();114 for (text_tarray::iterator term = allterms.begin(); 112 115 term < allterms.end(); ++term) { 113 if (!(*term).empty())116 if (!(*term).empty()) 114 117 terms.push_back(*term); 115 118 } … … 130 133 // answers[0] contains sentences with 1 keyword 131 134 // answers[1] contains sentences with 2 keywords, etc. 135 132 136 vector<int> answersSize(terms.size()); 133 137 // answersSize[0] is the combined size of sentences with 1 keyword, etc. 134 138 for(vector<int>::iterator size = answersSize.begin(); 135 size<answersSize.end(); ++size) 139 size<answersSize.end(); ++size) { 136 140 *size = 0; // initialise sentence size 141 } 137 142 138 143 int totfound = 0; 139 144 text_t::iterator str_current = str_start; 140 while (str_current<str_end && answersSize[terms.size()-1]<summaryLength) {145 while (str_current<str_end && answersSize[terms.size()-1]<summaryLength) { 141 146 // if the size of best sentences is greater than summary, that's enough! 142 147 text_t sentence = next_sentence(str_current,str_end); … … 144 149 text_tarray::iterator terms_current = terms_start; 145 150 int nFound = 0; 146 while (terms_current!=terms_end) {151 while (terms_current!=terms_end) { 147 152 text_t::iterator word = findword(sentence.begin(),sentence.end(), 148 153 *terms_current); 149 if(word!=sentence.end()) 150 { ++nFound; ++totfound; } 154 if (word!=sentence.end()) { 155 ++nFound; 156 ++totfound; 157 } 151 158 ++terms_current; 152 159 } 153 160 154 if (nFound>0 && answersSize[nFound-1]<summaryLength) {161 if (nFound>0 && answersSize[nFound-1]<summaryLength) { 155 162 answers[nFound-1].push_back(sentence); 156 163 answersSize[nFound-1] += sentence.size(); … … 159 166 160 167 text_t answer; 161 for(vector<text_tarray>::iterator sentarray = answers.end()-1; 162 sentarray>=answers.begin(); --sentarray) 163 for(text_tarray::iterator sentence = (*sentarray).begin(); 168 169 // Changed to using reverse iterator, as there is some concern as to 170 // whether the operations encoded with the usual iterator -- e.g. 171 // answers.end()-1 and so forth -- are safe. Certainly the code 172 // works out tidier using the reverse iterator and the segmentation 173 // fault that was occurring in this block went away 174 175 for (vector<text_tarray>::reverse_iterator sentarray = answers.rbegin(); 176 sentarray<answers.rend(); ++sentarray) { 177 for (text_tarray::iterator sentence = (*sentarray).begin(); 164 178 sentence < (*sentarray).end(); ++sentence) { 165 179 answer.append(*sentence); 166 if(answer.size()>=summaryLength) 180 181 if(answer.size()>=summaryLength) { 167 182 return answer; 168 } 169 170 if(!answer.empty()) 183 } 184 } 185 } 186 187 if (!answer.empty()) { 171 188 return answer; 189 } 172 190 173 191 return summarise_startend(htmlstr,summaryLength);
Note:
See TracChangeset
for help on using the changeset viewer.