Changeset 36857
- Timestamp:
- 2022-10-22T10:09:15+13:00 (18 months ago)
- Location:
- gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java
r36856 r36857 1 1 /* 2 * GS2Weka AVRecommender.java2 * GS2WekaDBSearch.java 3 3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org 4 4 * … … 43 43 // 'Search' 44 44 45 public class GS2Weka AVRecommenderextends AbstractGS2AudioSearch {46 47 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2Weka AVRecommender.class.getName ());45 public class GS2WekaDBSearch extends AbstractGS2AudioSearch { 46 47 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2WekaDBSearch.class.getName ()); 48 48 49 49 protected static final String OFFSET_PARAM = "offset"; … … 52 52 protected static final String MAXDOCS_PARAM = "maxDocs"; 53 53 54 protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model"; 55 protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser"; 54 //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model"; 55 //protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser"; 56 57 protected static final String WEKA_DB_DEFAULT_DIRECTORY = "wekaDB"; 58 protected static final String KNN_FEATURES_FILENAME = "av-features.csv"; 59 56 60 57 61 protected WekaDBWrapper wekadb_src = null; 58 62 59 public GS2Weka AVRecommender() {60 63 public GS2WekaDBSearch() { 64 61 65 if(this.wekadb_src == null) { 66 logger.info("Initializing WekaDBWrapper"); 62 67 this.wekadb_src = new WekaDBWrapper(); 63 68 } … … 66 71 /** do the actual query */ 67 72 protected Element processAudioQuery (Element request) { 68 // MG needs to be synchronized (this inspiration for this class) 69 // Since it is not known how concurrent Weka can be, play it safe for70 // now and also require synchronization71 73 74 // As the MG version needs to be java-synchronized (this inspiration for this class) 75 // And since it is not known how concurrent (thread-safe) Weka can be ... 76 // => Play it safe for now and restrict access to 'wekadb_src' using synchronized also 72 77 synchronized(this.wekadb_src) { 73 78 // Create a new (empty) result message ('doc' is in ServiceRack.java) … … 99 104 String index = (String) params.get (INDEX_PARAM); 100 105 if (index == null) { 101 index = WEKA_ MODEL_DEFAULT_DIRECTORY;102 } 103 104 // The location of the Weka modelindex106 index = WEKA_DB_DEFAULT_DIRECTORY; 107 } 108 109 // The location of the Weka db index 105 110 String toplevel_index_dir = GSFile.collectionIndexDir (this.site_home, this.cluster_name); 106 String weka_ model_index_dir = toplevel_index_dir + File.separatorChar + index;111 String weka_db_index_dir = toplevel_index_dir + File.separatorChar + index; 107 112 String assoc_index_dir = toplevel_index_dir + File.separatorChar + "assoc"; // **** 108 113 109 // set the Weka modelquery parameters to the values the user has specified114 // set the Weka DB query parameters to the values the user has specified 110 115 setStandardQueryParams (params); // **** 111 116 112 this.wekadb_src.runQuery (weka_ model_index_dir, KNN_MODEL_FILENAME, assoc_index_dir, query);117 this.wekadb_src.runQuery (weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query); 113 118 Vector docs = this.wekadb_src.getQueryResult (); 114 119 115 120 if (docs.isEmpty()) { 116 121 // something has gone wrong 117 GSXML.addError (result, "Couldn't query the Weka model", GSXML.ERROR_TYPE_SYSTEM);122 GSXML.addError (result, "Couldn't query the Weka DB", GSXML.ERROR_TYPE_SYSTEM); 118 123 return result; 119 124 } -
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java
r35177 r36857 22 22 import java.util.Vector; 23 23 import java.util.Collections; 24 import java.util.regex.Pattern; 25 import java.util.regex.Matcher; 26 27 import weka.core.Attribute; 28 import weka.core.Instance; 29 import weka.core.Instances; 24 30 25 31 import org.apache.log4j.*; 32 33 import org.greenstone.gsdl3.util.WekaFindInstanceKNN; 26 34 27 35 /** Java wrapper class for access to the Weka … … 107 115 * base_dir must end with a file separator (OS dependant) 108 116 */ 117 118 109 119 public void runQuery(String wekaDB_index_dir, String knn_model_file, 120 String assoc_index_dir, String query_string) { 121 122 // combine index_dir with audiodb fileanem 123 124 String full_knn_model_filename = wekaDB_index_dir + File.separatorChar + knn_model_file; 125 126 //String full_chr12_filename = assoc_index_dir + File.separatorChar 127 // + query_string + File.separatorChar + "doc.chr12"; 128 129 System.err.println("**** full knn model filename = " + full_knn_model_filename); 130 131 // Example returned result from Weka KNN 132 // => first line is the input instance ('filename+segment',Arousal,Valence) 133 // following (indented lines) nearest neighbour matches in same format 134 // 135 // ds_22716_5743-6,-0.549489,-0.118439 136 // ds_22716_5743-6,-0.549489,-0.118439 137 // ds_31008_6550-30,-0.549489,-0.118439 138 // ds_72651_26831-6,-0.549489,-0.118439 139 // ds_26196_9214-18,-0.549489,-0.118439 140 141 142 WekaFindInstanceKNN.init(full_knn_model_filename); 143 144 //Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours("ds_22716_5743",6,-0.549489,-0.118439,3); 145 String doc_id = query_string; 146 147 int segment = 6; 148 //String segment_str = Integer.toString(segment); 149 String doc_id_segment = doc_id +"-" + segment; 150 151 double arousal_val = -0.549489; 152 double valence_val = -0.118439; 153 int k_nearest_num = 3; 154 155 System.err.println("doc_id_segment = " + doc_id_segment); 156 157 158 Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours(doc_id_segment,arousal_val,valence_val,k_nearest_num); 159 160 161 query_result_ = new Vector(); 162 163 /* 164 WekaDBDocInfo wekaDB_doc_info1 = new WekaDBDocInfo("ds_20415_2337",0.9,4); 165 query_result_.add(wekaDB_doc_info1); 166 167 WekaDBDocInfo wekaDB_doc_info2 = new WekaDBDocInfo("ds_51017_15513",0.87,1); 168 query_result_.add(wekaDB_doc_info2); 169 170 WekaDBDocInfo wekaDB_doc_info3 = new WekaDBDocInfo("ds_20415_2337",0.82,6); 171 query_result_.add(wekaDB_doc_info3); 172 */ 173 174 int clamped_k_nearest_num = Math.min(k_nearest_num,nearest_instances.size()); 175 176 for (int i=0; i<clamped_k_nearest_num; i++) { 177 Instance instance = nearest_instances.instance(i); 178 logger.info("\tProcessing returned instance: " + instance); 179 180 //Attribute doc_id_segment_att = instance.attribute(0); 181 //String matching_doc_id_segment = instance.attribute(0).value(0); 182 String matching_doc_id_segment = instance.stringValue(0); 183 //double arousal_val = instance.attribute(1); 184 //double valence_val = instance.attribute(2); 185 186 187 Pattern p = Pattern.compile("^(\\w+)-(\\d+)$"); 188 Matcher m = p.matcher(matching_doc_id_segment); 189 if (m.matches()) { 190 191 String matching_doc_id = m.group(1); 192 int matching_segment_offset = Integer.parseInt(m.group(2)); 193 194 double matching_rank = 0.9; 195 196 logger.info("\tAdding in: matching_doc_id = " + matching_doc_id); 197 WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset); 198 query_result_.add(wekaDB_doc_info); 199 } 200 else { 201 logger.error("Returned AV k-nearest neighbour match '"+doc_id_segment+"' could not be parsed as <doc-id>-<segment>" ); 202 } 203 } 204 205 /* 206 int num_matches_within_track = 6; 207 208 209 first_entry = addQueryResult(first_entry,root_doc_id,rankVector,offsetVector); 210 // and now reset vectors to empty to be ready for next chain of values 211 rankVector = new Vector<Double>(); 212 offsetVector = new Vector<Integer>(); 213 214 215 rankVector.add(rank); 216 offsetVector.add(target_frame); 217 218 addQueryResult(first_entry,root_doc_id,rankVector,offsetVector); 219 */ 220 221 } 222 223 public void runQueryOLD(String wekaDB_index_dir, String knn_model_file, 110 224 String assoc_index_dir, String query_string) { 111 225 … … 120 234 // **** 121 235 String [] cmd_array = new String[] { 122 "java -jarweka.jar",236 "java", "-jar", "weka.jar", 123 237 "-d", full_knn_model_filename, 124 238 "-Q", "nsequence",
Note:
See TracChangeset
for help on using the changeset viewer.