Changeset 36859
- Timestamp:
- 2022-10-22T15:41:14+13:00 (18 months ago)
- Location:
- gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java
r36857 r36859 51 51 protected static final String RADIUS_PARAM = "radius"; 52 52 protected static final String MAXDOCS_PARAM = "maxDocs"; 53 protected static final String AROUSAL_PARAM = "arousal"; 54 protected static final String VALENCE_PARAM = "valence"; 53 55 54 56 //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model"; … … 115 117 setStandardQueryParams (params); // **** 116 118 117 this.wekadb_src.runQuery 118 Vector docs = this.wekadb_src.getQueryResult 119 this.wekadb_src.runQuery(weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query); 120 Vector docs = this.wekadb_src.getQueryResult(); 119 121 120 122 if (docs.isEmpty()) { … … 145 147 result.appendChild (document_list); 146 148 for (int d = 0; d < docs.size (); d++) { 147 WekaDBDocInfo adb_doc = (WekaDBDocInfo) docs.elementAt(d);148 149 String doc_id = adb_doc.getDocID();150 double rank = adb_doc.getTopRank();151 String offsets = adb_doc.getOffsetList();149 WekaDBDocInfo wdb_doc = (WekaDBDocInfo) docs.elementAt(d); 150 151 String doc_id = wdb_doc.getDocID(); 152 double rank = wdb_doc.getTopRank(); 153 String offsets = wdb_doc.getOffsetList(); 152 154 153 155 Element doc_node = createDocNode (result_doc, doc_id, Double.toString (rank)); … … 191 193 int docs = Integer.parseInt(value); 192 194 this.wekadb_src.setMaxDocs(docs); 195 } 196 else if (name.equals(AROUSAL_PARAM)) { 197 double arousal = Double.parseDouble(value); 198 this.wekadb_src.setArousal(arousal); 199 } 200 else if (name.equals(VALENCE_PARAM)) { 201 double valence = Double.parseDouble(value); 202 this.wekadb_src.setValence(valence); 193 203 } // ignore any others 194 204 } -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java
r36857 r36859 45 45 46 46 protected int offset_ = 100; 47 protected int length_ = 20; 47 protected int length_ = 20; // **** Unused 48 48 49 49 // Approximate matching not yet utilized 50 protected double radius_; 50 protected double radius_; // **** Unused 51 51 52 52 protected int max_docs_; 53 53 54 protected double arousal_; 55 protected double valence_; 56 54 57 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.util.WekaDBWrapper.class.getName ()); 55 58 … … 76 79 radius_ = radius; 77 80 } 78 81 79 82 public void setMaxDocs(int max_docs) { 80 83 max_docs_ = max_docs; 81 84 } 82 85 86 public void setArousal(double arousal) { 87 arousal_ = arousal; 88 } 89 public void setValence(double valence) { 90 valence_ = valence; 91 } 92 83 93 /** returns a string with all the current query param settings */ 84 94 // the following was in MG version, do we need this in WekaDB version? // **** … … 142 152 WekaFindInstanceKNN.init(full_knn_model_filename); 143 153 144 //Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours("ds_22716_5743",6,-0.549489,-0.118439,3); 145 String doc_id = query_string; 146 147 int segment = 6; 148 //String segment_str = Integer.toString(segment); 149 String doc_id_segment = doc_id +"-" + segment; 150 151 double arousal_val = -0.549489; 152 double valence_val = -0.118439; 153 int k_nearest_num = 3; 154 155 System.err.println("doc_id_segment = " + doc_id_segment); 156 157 158 Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours(doc_id_segment,arousal_val,valence_val,k_nearest_num); 159 160 161 query_result_ = new Vector(); 162 163 /* 164 WekaDBDocInfo wekaDB_doc_info1 = new WekaDBDocInfo("ds_20415_2337",0.9,4); 165 query_result_.add(wekaDB_doc_info1); 166 167 WekaDBDocInfo wekaDB_doc_info2 = new WekaDBDocInfo("ds_51017_15513",0.87,1); 168 query_result_.add(wekaDB_doc_info2); 169 170 WekaDBDocInfo wekaDB_doc_info3 = new WekaDBDocInfo("ds_20415_2337",0.82,6); 171 query_result_.add(wekaDB_doc_info3); 172 */ 173 174 int clamped_k_nearest_num = Math.min(k_nearest_num,nearest_instances.size()); 175 176 for (int i=0; i<clamped_k_nearest_num; i++) { 177 Instance instance = nearest_instances.instance(i); 154 String doc_id = query_string; 155 int segment = offset_; 156 157 String query_doc_id_segment = doc_id + "-" + segment; 158 159 double query_arousal_val = arousal_; 160 double query_valence_val = valence_; 161 162 int k_nearest_num = max_docs_; 163 int expanded_k_nearest_num = max_docs_ * 5; // * internally get more matches, then sift through to arrive at the best 'max_docs_' 164 165 Pattern doc_seg_re = Pattern.compile("^(\\w+)-(\\d+)$"); 166 //Matcher query_doc_seg_match = doc_seq_re.matcher(query_doc_id_segment); 167 168 Instances nearest_instances 169 = WekaFindInstanceKNN.kNearestNeighbours(query_doc_id_segment,query_arousal_val,query_valence_val,k_nearest_num); 170 171 172 Vector expanded_query_result = new Vector(); 173 174 int nearest_instances_len = nearest_instances.size(); 175 176 int clamped_expanded_k_nearest_num = Math.min(expanded_k_nearest_num,nearest_instances_len); 177 178 double pos_penalty = 0.1; 179 180 for (int ei=0; ei<clamped_expanded_k_nearest_num; ei++) { 181 Instance instance = nearest_instances.instance(ei); 178 182 logger.info("\tProcessing returned instance: " + instance); 179 183 180 //Attribute doc_id_segment_att = instance.attribute(0);181 //String matching_doc_id_segment = instance.attribute(0).value(0);182 184 String matching_doc_id_segment = instance.stringValue(0); 183 //double arousal_val = instance.attribute(1); 184 //double valence_val = instance.attribute(2); 185 186 187 Pattern p = Pattern.compile("^(\\w+)-(\\d+)$"); 188 Matcher m = p.matcher(matching_doc_id_segment); 185 186 187 //Pattern p = Pattern.compile("^(\\w+)-(\\d+)$"); 188 Matcher m = doc_seg_re.matcher(matching_doc_id_segment); 189 189 if (m.matches()) { 190 190 191 191 String matching_doc_id = m.group(1); 192 192 int matching_segment_offset = Integer.parseInt(m.group(2)); 193 194 double matching_rank = 0.9; 193 194 if (matching_doc_id.equals(doc_id)) { 195 continue; 196 } 197 198 double matching_arousal_val = instance.value(1); 199 double matching_valence_val = instance.value(2); 200 201 double matching_diff = (Math.abs(query_arousal_val - matching_arousal_val) 202 + Math.abs(query_valence_val - matching_valence_val))/4.0; 203 double matching_rank = 1.0 - matching_diff - (pos_penalty * (double)ei); 195 204 196 205 logger.info("\tAdding in: matching_doc_id = " + matching_doc_id); 197 206 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset); 198 query_result_.add(wekaDB_doc_info);207 expanded_query_result.add(wekaDB_doc_info); 199 208 } 200 209 else { 201 logger.error("Returned AV k-nearest neighbour match '"+doc_id_segment+"' could not be parsed as <doc-id>-<segment>" ); 210 logger.error("Returned AV k-nearest neighbour match '"+matching_doc_id_segment+"' could not be parsed as <doc-id>-<segment>" ); 211 } 212 } 213 214 query_result_ = new Vector(); 215 216 int i = 0; 217 while (i < k_nearest_num) { 218 if (i >= expanded_query_result.size()) { 219 break; 202 220 } 203 } 204 205 /* 206 int num_matches_within_track = 6; 207 208 209 first_entry = addQueryResult(first_entry,root_doc_id,rankVector,offsetVector); 210 // and now reset vectors to empty to be ready for next chain of values 211 rankVector = new Vector<Double>(); 212 offsetVector = new Vector<Integer>(); 213 214 215 rankVector.add(rank); 216 offsetVector.add(target_frame); 217 218 addQueryResult(first_entry,root_doc_id,rankVector,offsetVector); 219 */ 220 221 222 query_result_.add(expanded_query_result.get(i)); 223 i++; 224 } 225 226 Collections.sort(query_result_); 221 227 } 222 228 -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaFindInstanceKNN.java
r36853 r36859 93 93 public static Instances kNearestNeighbours(String doc_id_segment, double arousal_val,double valence_val, int k_nearest_num) 94 94 { 95 System.err.println("**** wekaFindInstnaceKNN::kNearestNeighbours() called with:"); 96 System.err.print( "**** doc_id_segment = " + doc_id_segment); 97 System.err.print( " arousal_val = " + arousal_val); 98 System.err.print( " valence_val = " + valence_val); 99 System.err.println(" k_nearest_num = " + k_nearest_num); 95 100 96 101 Instance sample_instance = new DenseInstance(3); … … 103 108 //sample_instance.setValue(2, -0.118439); 104 109 105 //String segment_str = Integer.toString(segment);106 //sample_instance.setValue(0, doc_id +"-" + segment_str);107 110 108 111 sample_instance.setValue(0, doc_id_segment); 109 112 110 113 sample_instance.setValue(1, arousal_val); 111 114 sample_instance.setValue(2, valence_val);
Note:
See TracChangeset
for help on using the changeset viewer.