Changeset 36964
- Timestamp:
- 2022-11-30T17:22:16+13:00 (12 months ago)
- Location:
- gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java
r36859 r36964 150 150 151 151 String doc_id = wdb_doc.getDocID(); 152 153 double arousal = wdb_doc.getTopArousal(); 154 double valence = wdb_doc.getTopValence(); 155 152 156 double rank = wdb_doc.getTopRank(); 153 157 String offsets = wdb_doc.getOffsetList(); 154 158 155 Element doc_node = createDocNode (result_doc, doc_id, Double.toString (rank)); 159 Element doc_node = createDocNode(result_doc, doc_id, Double.toString(rank)); 160 doc_node.setAttribute("arousalVal", Double.toString(arousal)); 161 doc_node.setAttribute("valenceVal", Double.toString(valence)); 156 162 doc_node.setAttribute("frameOffset", offsets); 157 163 -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBDocInfo.java
r36864 r36964 29 29 { 30 30 public String oid_; 31 public Vector<Double> arousalVector_; 32 public Vector<Double> valenceVector_; 31 33 public Vector<Double> rankVector_; 32 34 public Vector<Integer> offsetVector_; 33 35 34 public WekaDBDocInfo(String doc_oid, double rank, int offset)36 public WekaDBDocInfo(String doc_oid, double arousal, double valence, double rank, int offset) 35 37 { 36 38 oid_ = doc_oid; 37 39 40 arousalVector_ = new Vector<Double>(); 41 arousalVector_.add (arousal); 42 valenceVector_ = new Vector<Double>(); 43 valenceVector_.add (valence); 44 38 45 rankVector_ = new Vector<Double>(); 39 46 rankVector_.add (rank); … … 44 51 45 52 46 public WekaDBDocInfo(String doc_oid, Vector<Double> rankVector, Vector<Integer> offsetVector) 53 public WekaDBDocInfo(String doc_oid, Vector<Double> arousalVector, Vector<Double> valenceVector, 54 Vector<Double> rankVector, Vector<Integer> offsetVector) 47 55 { 48 56 oid_ = doc_oid; 49 rankVector_ = rankVector; 57 58 arousalVector_ = arousalVector; 59 valenceVector_ = valenceVector; 60 61 rankVector_ = rankVector; 50 62 offsetVector_ = offsetVector; 51 63 } … … 54 66 { 55 67 return oid_; 68 } 69 70 public double getTopArousal() 71 { 72 return arousalVector_.get(0); 73 } 74 75 public double getTopValence() 76 { 77 return valenceVector_.get(0); 56 78 } 57 79 -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java
r36864 r36964 98 98 99 99 100 protected boolean addQueryResult(boolean first_entry, String doc_id, 100 protected boolean addQueryResult(boolean first_entry, String doc_id, 101 Vector<Double> arousalVector, Vector<Double> valenceVector, 101 102 Vector<Double> rankVector, Vector<Integer> offsetVector) 102 103 { 103 104 104 105 if (first_entry) { 105 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id, rankVector,offsetVector);106 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id,arousalVector,valenceVector,rankVector,offsetVector); 106 107 query_results_.add(wekaDB_doc_info); 107 108 first_entry = false; 108 109 } 109 110 else { 110 double rank = rankVector.get(0); 111 int offset = offsetVector.get(0); 112 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id,rank,offset); 111 double arousal = arousalVector.get(0); 112 double valence = valenceVector.get(0); 113 114 double rank = rankVector.get(0); 115 int offset = offsetVector.get(0); 116 117 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(doc_id,arousal,valence,rank,offset); 113 118 114 119 query_results_.add(wekaDB_doc_info); … … 150 155 * - maintains state between requests as can be slow 151 156 * base_dir and index_path should join together to provide 152 * the absolute location of the mg index files eg ..../index/dtx/demo157 * the absolute location of the Weka CSV file e.g. <col>/index/wekaDB/av-features.csv 153 158 * base_dir must end with a file separator (OS dependant) 154 159 */ … … 156 161 157 162 public void runQuery(String wekaDB_index_dir, String knn_model_file, 158 String assoc_index_dir, String query_string) { 159 160 // combine index_dir with audiodb fileanem 163 String assoc_index_dir, String query_string) 164 { 161 165 162 166 String full_knn_model_filename = wekaDB_index_dir + File.separatorChar + knn_model_file; 163 167 164 //String full_chr12_filename = assoc_index_dir + File.separatorChar 165 // + query_string + File.separatorChar + "doc.chr12"; 166 167 System.err.println("**** full knn model filename = " + full_knn_model_filename); 168 //System.err.println("**** full knn model filename = " + full_knn_model_filename); 168 169 169 170 // Example returned result from Weka KNN … … 186 187 187 188 double query_arousal_val = arousal_; 188 double query_valence_val = 189 double query_valence_val = valence_; 189 190 190 191 int k_nearest_num = max_docs_; … … 202 203 int nearest_instances_len = nearest_instances.size(); 203 204 204 int clamped_expanded_k_nearest_num = Math.min(expanded_k_nearest_num,nearest_instances_len); 205 206 double pos_penalty = 0.1; 207 int topup_count = 0; 205 int clamped_expanded_k_nearest_num = Math.max(nearest_instances_len,k_nearest_num); 206 207 if (clamped_expanded_k_nearest_num > k_nearest_num) { 208 System.err.println("**** expanded number of k-nearest matches = " + clamped_expanded_k_nearest_num); 209 } 208 210 209 211 for (int ei=0; ei<clamped_expanded_k_nearest_num; ei++) { 210 212 Instance instance = nearest_instances.instance(ei); 211 logger.info("\tProcessing returned instance: " + instance);212 213 213 214 String matching_doc_id_segment = instance.stringValue(0); … … 224 225 if (matching_doc_id.equals(doc_id)) { 225 226 // don't add in matches that come from a matching segment in the query doc 227 //logger.info("\tSelf-match with query doc => Skipping: " + instance); 228 System.err.println("\tSelf-match with query doc => Skipping: " + instance); 229 230 continue; 231 } 232 233 //logger.info("\tAdding returned instance: " + instance); 234 System.err.println("\tAdding returned instance: " + instance); 235 236 double matching_arousal_val = instance.value(1); 237 double matching_valence_val = instance.value(2); 238 239 double matching_diff = (Math.abs(query_arousal_val - matching_arousal_val) 240 + Math.abs(query_valence_val - matching_valence_val))/4.0; 241 double matching_rank = 1.0 - matching_diff; 242 243 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id, 244 matching_arousal_val, matching_valence_val, 245 matching_rank,matching_segment_offset); 246 247 expanded_query_results.add(wekaDB_doc_info); 248 } 249 else { 250 logger.error("Returned AV k-nearest neighbour match '"+matching_doc_id_segment+"' could not be parsed as <doc-id>-<segment>" ); 251 } 252 } 253 254 //Collections.sort(expanded_query_results); 255 256 query_results_ = new Vector(); 257 258 int i = 0; 259 while (i < k_nearest_num) { 260 if (i >= expanded_query_results.size()) { 261 break; 262 } 263 264 query_results_.add(expanded_query_results.get(i)); 265 i++; 266 } 267 268 //Collections.sort(query_results_); 269 } 270 271 272 273 public void runQueryDiffAndMerge(String wekaDB_index_dir, String knn_model_file, 274 String assoc_index_dir, String query_string) 275 { 276 277 // combine index_dir with audiodb fileanem 278 279 String full_knn_model_filename = wekaDB_index_dir + File.separatorChar + knn_model_file; 280 281 //String full_chr12_filename = assoc_index_dir + File.separatorChar 282 // + query_string + File.separatorChar + "doc.chr12"; 283 284 System.err.println("**** full knn model filename = " + full_knn_model_filename); 285 286 // Example returned result from Weka KNN 287 // => first line is the input instance ('filename+segment',Arousal,Valence) 288 // following (indented lines) nearest neighbour matches in same format 289 // 290 // ds_22716_5743-6,-0.549489,-0.118439 291 // ds_22716_5743-6,-0.549489,-0.118439 292 // ds_31008_6550-30,-0.549489,-0.118439 293 // ds_72651_26831-6,-0.549489,-0.118439 294 // ds_26196_9214-18,-0.549489,-0.118439 295 296 297 WekaFindInstanceKNN.init(full_knn_model_filename); 298 299 String doc_id = query_string; 300 int segment = offset_; 301 302 String query_doc_id_segment = doc_id + "-" + segment; 303 304 double query_arousal_val = arousal_; 305 double query_valence_val = valence_; 306 307 int k_nearest_num = max_docs_; 308 int expanded_k_nearest_num = max_docs_ * 5; // * internally get more matches, then sift through to arrive at the best 'max_docs_' 309 310 Pattern doc_seg_re = Pattern.compile("^(\\w+)-(\\d+)$"); 311 //Matcher query_doc_seg_match = doc_seq_re.matcher(query_doc_id_segment); 312 313 Instances nearest_instances 314 = WekaFindInstanceKNN.kNearestNeighbours(query_doc_id_segment,query_arousal_val,query_valence_val,k_nearest_num); 315 316 317 Vector expanded_query_results = new Vector(); 318 319 int nearest_instances_len = nearest_instances.size(); 320 321 int clamped_expanded_k_nearest_num = Math.min(expanded_k_nearest_num,nearest_instances_len); 322 323 double pos_penalty = 0.1; 324 int topup_count = 0; 325 326 for (int ei=0; ei<clamped_expanded_k_nearest_num; ei++) { 327 Instance instance = nearest_instances.instance(ei); 328 logger.info("\tProcessing returned instance: " + instance); 329 330 String matching_doc_id_segment = instance.stringValue(0); 331 332 //Pattern p = Pattern.compile("^(\\w+)-(\\d+)$"); 333 Matcher m = doc_seg_re.matcher(matching_doc_id_segment); 334 if (m.matches()) { 335 336 String matching_doc_id = m.group(1); 337 int end_of_matching_segment_offset = Integer.parseInt(m.group(2)); 338 //int matching_segment_offset = end_of_matching_segment_offset - (int)AV_SEGMENT_LENGTH_SECS; 339 int matching_segment_offset = end_of_matching_segment_offset; 340 341 if (matching_doc_id.equals(doc_id)) { 342 // don't add in matches that come from a matching segment in the query doc 226 343 continue; 227 344 } … … 235 352 236 353 logger.info("\tAdding in: matching_doc_id = " + matching_doc_id); 237 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset); 354 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id, 355 matching_arousal_val, matching_valence_val, 356 matching_rank,matching_segment_offset); 238 357 239 358 //expanded_query_results.add(wekaDB_doc_info); … … 270 389 //Collections.sort(query_results_); 271 390 } 272 273 public void runQueryOLD(String wekaDB_index_dir, String knn_model_file,274 String assoc_index_dir, String query_string) {275 276 // combine index_dir with audiodb fileanem277 278 String full_knn_model_filename = wekaDB_index_dir + File.separatorChar + knn_model_file;279 String full_chr12_filename = assoc_index_dir + File.separatorChar280 + query_string + File.separatorChar + "doc.chr12";281 282 int num_matches_within_track = 6;283 284 // ****285 String [] cmd_array = new String[] {286 "java", "-jar", "weka.jar",287 "-d", full_knn_model_filename,288 "-Q", "nsequence",289 "-p", String.format("%d",offset_),290 "-n", String.format("%d",num_matches_within_track),291 "-l", String.format("%d",length_),292 "-r", String.format("%d",max_docs_),293 "-f", full_chr12_filename294 };295 296 System.err.println("**** cmd_array = " + String.join(" ", cmd_array));297 298 Runtime runtime = Runtime.getRuntime();299 try {300 Process wekaDB_proc = runtime.exec(cmd_array);301 //int exitVal = wekaDB_proc.waitFor();302 //System.err.println("*** exit status = " + exitVal);303 304 InputStream wis = wekaDB_proc.getInputStream();305 InputStreamReader wisr = new InputStreamReader(wis);306 BufferedReader wbr = new BufferedReader(wisr);307 308 query_results_ = new Vector();309 310 boolean first_entry = true;311 int line_count = 0;312 313 String root_doc_id = null;314 Vector<Double> rankVector = new Vector<Double>();315 Vector<Integer> offsetVector = new Vector<Integer>();316 317 // Example output318 // D8 0.00105175319 // 1.69786e-16 392 392320 // 0.00113568 392 673321 // 0.00127239 392 910322 // 0.00139736 392 481323 // 0.00145331 392 303324 // D2 0.00429758325 // 0.00403335 392 865326 // 0.00411288 392 458327 // 0.00442461 392 866328 // 0.00444272 392 864329 // 0.00447434 392 424330 // ...331 332 String line;333 while ((line = wbr.readLine()) != null) {334 String[] tokens = line.split("\\s+");335 line_count++;336 337 if (tokens.length==2) {338 // processing a top-level doc line339 340 if (line_count>1) {341 // struck new top-level entry => store vector vals for previous block342 343 first_entry = addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);344 // and now reset vectors to empty to be ready for next chain of values345 rankVector = new Vector<Double>();346 offsetVector = new Vector<Integer>();347 }348 349 root_doc_id = tokens[0];350 }351 else {352 // should be 3 items353 double euclidean_dist = Double.parseDouble(tokens[0]);354 int src_frame = Integer.parseInt(tokens[1]);355 int target_frame = Integer.parseInt(tokens[2]);356 357 // ****358 359 // enforce 1.0 as upper limit due to rounding errors360 // in audioDB distance calculations361 double rank = Math.min(1.0 - euclidean_dist,1.0);362 363 if ((line_count==2) && (src_frame==target_frame)) {364 // Found match with self365 continue;366 }367 368 rankVector.add(rank);369 offsetVector.add(target_frame);370 }371 372 }373 374 addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);375 376 wbr.close();377 378 // sort query_results_ on 'rank' field379 // note: compareTo() method impelemented to sort into descending order380 381 Collections.sort(query_results_);382 383 384 }385 catch (Exception e) {386 logger.error("Failed to execute the following command: " + String.join(" ", cmd_array));387 e.printStackTrace();388 }389 390 }391 391 392 392
Note:
See TracChangeset
for help on using the changeset viewer.