Changeset 36864
- Timestamp:
- 2022-10-24T08:33:26+13:00 (18 months ago)
- Location:
- gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBDocInfo.java
r35177 r36864 61 61 } 62 62 63 public void incTopRank(double inc_rank_val) 64 { 65 double top_rank = rankVector_.get(0); 66 67 double new_top_rank = top_rank + inc_rank_val; 68 rankVector_.set(0,new_top_rank); 69 } 70 63 71 64 72 public String getOffsetList() -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java
r36863 r36864 44 44 45 45 /** the query result, filled in by runQuery */ 46 protected Vector query_result _;46 protected Vector query_results_; 47 47 48 48 protected int offset_ = 100; … … 60 60 61 61 public WekaDBWrapper() { 62 query_result _ = null;62 query_results_ = null; 63 63 } 64 64 … … 104 104 if (first_entry) { 105 105 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id,rankVector,offsetVector); 106 query_result _.add(wekaDB_doc_info);106 query_results_.add(wekaDB_doc_info); 107 107 first_entry = false; 108 108 } … … 112 112 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id,rank,offset); 113 113 114 query_result _.add(wekaDB_doc_info);114 query_results_.add(wekaDB_doc_info); 115 115 } 116 116 … … 119 119 120 120 121 protected int mergeResultDoc(Vector query_results, WekaDBDocInfo new_doc_info, double inc_rank_val) 122 { 123 int merged = 0; 124 125 String new_doc_id = new_doc_info.getDocID(); 126 127 final int query_results_len = query_results.size(); 128 129 for (int i=0; i<query_results_len; i++) { 130 WekaDBDocInfo existing_doc_info = (WekaDBDocInfo)query_results.get(i); 131 132 String existing_doc_id = existing_doc_info.getDocID(); 133 if (new_doc_id.equals(existing_doc_id)) { 134 merged = 1; 135 existing_doc_info.incTopRank(inc_rank_val); 136 break; 137 } 138 } 139 140 if (merged == 0) { 141 query_results.add(new_doc_info); 142 } 143 144 return merged; 145 } 146 121 147 /** actually carry out the query. 122 148 Use the set methods to set query results. 123 Writes the result to query_result .149 Writes the result to query_results. 124 150 * - maintains state between requests as can be slow 125 151 * base_dir and index_path should join together to provide … … 172 198 173 199 174 Vector expanded_query_result = new Vector();200 Vector expanded_query_results = new Vector(); 175 201 176 202 int nearest_instances_len = nearest_instances.size(); … … 179 205 180 206 double pos_penalty = 0.1; 207 int topup_count = 0; 181 208 182 209 for (int ei=0; ei<clamped_expanded_k_nearest_num; ei++) { … … 186 213 String matching_doc_id_segment = instance.stringValue(0); 187 214 188 189 215 //Pattern p = Pattern.compile("^(\\w+)-(\\d+)$"); 190 216 Matcher m = doc_seg_re.matcher(matching_doc_id_segment); … … 193 219 String matching_doc_id = m.group(1); 194 220 int end_of_matching_segment_offset = Integer.parseInt(m.group(2)); 195 int matching_segment_offset = end_of_matching_segment_offset - (int)AV_SEGMENT_LENGTH_SECS; 221 //int matching_segment_offset = end_of_matching_segment_offset - (int)AV_SEGMENT_LENGTH_SECS; 222 int matching_segment_offset = end_of_matching_segment_offset; 196 223 197 224 if (matching_doc_id.equals(doc_id)) { 225 // don't add in matches that come from a matching segment in the query doc 198 226 continue; 199 227 } … … 208 236 logger.info("\tAdding in: matching_doc_id = " + matching_doc_id); 209 237 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset); 210 expanded_query_result.add(wekaDB_doc_info); 238 239 //expanded_query_results.add(wekaDB_doc_info); 240 241 double inc_rank_val = matching_rank / (double)(topup_count+2); // starts to a 50% (/2) weighting when topup_count == 0 242 int merged = mergeResultDoc(expanded_query_results,wekaDB_doc_info,inc_rank_val); 243 244 topup_count += merged; 245 246 if ((expanded_query_results.size() > k_nearest_num) && (topup_count > k_nearest_num)) { 247 // guard to stop multiple recurring matches in the same doc dominationg the rank_val 248 break; 249 } 211 250 } 212 251 else { … … 214 253 } 215 254 } 216 217 query_result_ = new Vector(); 255 256 Collections.sort(expanded_query_results); 257 258 query_results_ = new Vector(); 218 259 219 260 int i = 0; 220 261 while (i < k_nearest_num) { 221 if (i >= expanded_query_result .size()) {262 if (i >= expanded_query_results.size()) { 222 263 break; 223 264 } 224 265 225 query_result _.add(expanded_query_result.get(i));266 query_results_.add(expanded_query_results.get(i)); 226 267 i++; 227 268 } 228 269 229 Collections.sort(query_result_);270 //Collections.sort(query_results_); 230 271 } 231 272 … … 265 306 BufferedReader wbr = new BufferedReader(wisr); 266 307 267 query_result _ = new Vector();308 query_results_ = new Vector(); 268 309 269 310 boolean first_entry = true; … … 335 376 wbr.close(); 336 377 337 // sort query_result _ on 'rank' field378 // sort query_results_ on 'rank' field 338 379 // note: compareTo() method impelemented to sort into descending order 339 380 340 Collections.sort(query_result _);381 Collections.sort(query_results_); 341 382 342 383 … … 353 394 public Vector getQueryResult() 354 395 { 355 return query_result _;396 return query_results_; 356 397 } 357 398 }
Note:
See TracChangeset
for help on using the changeset viewer.