source: gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java@ 36859

Last change on this file since 36859 was 36859, checked in by davidb, 19 months ago

Coding developments that mean param passed arousal and valence values not used; query_resutls_ capped to max_docs_

File size: 7.7 KB
Line 
1/*
2 * GS2WekaDBSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29// java
30import java.util.Vector;
31import java.util.ArrayList;
32import java.util.HashMap;
33import java.util.Map;
34import java.util.Set;
35import java.util.Iterator;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40// To fit in with the class hierarchy that has been developed to data
41// around music content based retrieval, the class name shifts from
42// the external facing 'Recommender' suffix to using 'Query' and/or
43// 'Search'
44
45public class GS2WekaDBSearch extends AbstractGS2AudioSearch {
46
47 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2WekaDBSearch.class.getName ());
48
49 protected static final String OFFSET_PARAM = "offset";
50 protected static final String LENGTH_PARAM = "length";
51 protected static final String RADIUS_PARAM = "radius";
52 protected static final String MAXDOCS_PARAM = "maxDocs";
53 protected static final String AROUSAL_PARAM = "arousal";
54 protected static final String VALENCE_PARAM = "valence";
55
56 //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model";
57 //protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser";
58
59 protected static final String WEKA_DB_DEFAULT_DIRECTORY = "wekaDB";
60 protected static final String KNN_FEATURES_FILENAME = "av-features.csv";
61
62
63 protected WekaDBWrapper wekadb_src = null;
64
65 public GS2WekaDBSearch() {
66
67 if(this.wekadb_src == null) {
68 logger.info("Initializing WekaDBWrapper");
69 this.wekadb_src = new WekaDBWrapper();
70 }
71 }
72
73 /** do the actual query */
74 protected Element processAudioQuery (Element request) {
75
76 // As the MG version needs to be java-synchronized (this inspiration for this class)
77 // And since it is not known how concurrent (thread-safe) Weka can be ...
78 // => Play it safe for now and restrict access to 'wekadb_src' using synchronized also
79 synchronized(this.wekadb_src) {
80 // Create a new (empty) result message ('doc' is in ServiceRack.java)
81 Document result_doc = XMLConverter.newDOM();
82 Element result = result_doc.createElement (GSXML.RESPONSE_ELEM);
83
84 // Rather than QUERY_SERVICE use "TextQuery"
85 // => makes the result looks the same as a text query
86 result.setAttribute (GSXML.FROM_ATT, "TextQuery");
87 result.setAttribute (GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
88
89 // Get the parameters of the request
90 Element param_list = (Element) GSXML.getChildByTagName (request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
91 if (param_list == null) {
92 logger.error ("Weka AudioQuery request had no paramList.");
93 return result; // Return the empty result
94 }
95
96 // Process the request parameters
97 HashMap params = GSXML.extractParams (param_list, false);
98
99 // Make sure a query has been specified
100 String query = (String) params.get (QUERY_PARAM);
101 if (query == null || query.equals ("")) {
102 return result; // Return the empty result
103 }
104
105 // If an index hasn't been specified, use the default
106 String index = (String) params.get (INDEX_PARAM);
107 if (index == null) {
108 index = WEKA_DB_DEFAULT_DIRECTORY;
109 }
110
111 // The location of the Weka db index
112 String toplevel_index_dir = GSFile.collectionIndexDir (this.site_home, this.cluster_name);
113 String weka_db_index_dir = toplevel_index_dir + File.separatorChar + index;
114 String assoc_index_dir = toplevel_index_dir + File.separatorChar + "assoc"; // ****
115
116 // set the Weka DB query parameters to the values the user has specified
117 setStandardQueryParams (params); // ****
118
119 this.wekadb_src.runQuery(weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query);
120 Vector docs = this.wekadb_src.getQueryResult();
121
122 if (docs.isEmpty()) {
123 // something has gone wrong
124 GSXML.addError (result, "Couldn't query the Weka DB", GSXML.ERROR_TYPE_SYSTEM);
125 return result;
126 }
127 long totalDocs = docs.size();
128
129 // Get the docnums out, and convert to HASH ids
130 if (docs.size () == 0) {
131 logger.error ("No results found...\n");
132 }
133
134 // Create a metadata list to store information about the query results
135 Element metadata_list = result_doc.createElement (GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
136 result.appendChild (metadata_list);
137
138 // Add a metadata element specifying the number of matching documents
139 // because the total number is just the number returned, use numDocsReturned, not numDocsMatched
140 GSXML.addMetadata (metadata_list, "numDocsReturned", ""+totalDocs);
141 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
142 GSXML.addMetadata (metadata_list, "query", query);
143
144 if (docs.size () > 0) {
145 // Create a document list to store the matching documents, and add them
146 Element document_list = result_doc.createElement (GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
147 result.appendChild (document_list);
148 for (int d = 0; d < docs.size (); d++) {
149 WekaDBDocInfo wdb_doc = (WekaDBDocInfo) docs.elementAt(d);
150
151 String doc_id = wdb_doc.getDocID();
152 double rank = wdb_doc.getTopRank();
153 String offsets = wdb_doc.getOffsetList();
154
155 Element doc_node = createDocNode (result_doc, doc_id, Double.toString (rank));
156 doc_node.setAttribute("frameOffset", offsets);
157
158 document_list.appendChild (doc_node);
159 }
160 }
161
162 // Create an empty term list as a place holder for the term information
163 Element term_list = result_doc.createElement (GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
164 result.appendChild (term_list);
165
166 return result;
167 }//end of synchronized
168 }
169
170 // should probably use a list rather than map
171 protected boolean setStandardQueryParams(HashMap params)
172 {
173 Set entries = params.entrySet();
174 Iterator i = entries.iterator();
175 while (i.hasNext()) {
176 Map.Entry m = (Map.Entry)i.next();
177 String name = (String)m.getKey();
178 String value = (String)m.getValue();
179
180 if (name.equals(OFFSET_PARAM)) {
181 int offset = Integer.parseInt(value);
182 this.wekadb_src.setOffset(offset);
183 }
184 else if (name.equals(LENGTH_PARAM)) {
185 int length = Integer.parseInt(value);
186 this.wekadb_src.setLength(length);
187 }
188 else if (name.equals(RADIUS_PARAM)) {
189 double radius = Double.parseDouble(value);
190 this.wekadb_src.setRadius(radius);
191 }
192 else if (name.equals(MAXDOCS_PARAM)) {
193 int docs = Integer.parseInt(value);
194 this.wekadb_src.setMaxDocs(docs);
195 }
196 else if (name.equals(AROUSAL_PARAM)) {
197 double arousal = Double.parseDouble(value);
198 this.wekadb_src.setArousal(arousal);
199 }
200 else if (name.equals(VALENCE_PARAM)) {
201 double valence = Double.parseDouble(value);
202 this.wekadb_src.setValence(valence);
203 } // ignore any others
204 }
205 return true;
206 }
207
208
209}
210
211
Note: See TracBrowser for help on using the repository browser.