source: gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java@ 36857

Last change on this file since 36857 was 36857, checked in by davidb, 19 months ago

Service now showing some basic functionality to retrieval songs

File size: 7.3 KB
Line 
1/*
2 * GS2WekaDBSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29// java
30import java.util.Vector;
31import java.util.ArrayList;
32import java.util.HashMap;
33import java.util.Map;
34import java.util.Set;
35import java.util.Iterator;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40// To fit in with the class hierarchy that has been developed to data
41// around music content based retrieval, the class name shifts from
42// the external facing 'Recommender' suffix to using 'Query' and/or
43// 'Search'
44
45public class GS2WekaDBSearch extends AbstractGS2AudioSearch {
46
47 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2WekaDBSearch.class.getName ());
48
49 protected static final String OFFSET_PARAM = "offset";
50 protected static final String LENGTH_PARAM = "length";
51 protected static final String RADIUS_PARAM = "radius";
52 protected static final String MAXDOCS_PARAM = "maxDocs";
53
54 //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model";
55 //protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser";
56
57 protected static final String WEKA_DB_DEFAULT_DIRECTORY = "wekaDB";
58 protected static final String KNN_FEATURES_FILENAME = "av-features.csv";
59
60
61 protected WekaDBWrapper wekadb_src = null;
62
63 public GS2WekaDBSearch() {
64
65 if(this.wekadb_src == null) {
66 logger.info("Initializing WekaDBWrapper");
67 this.wekadb_src = new WekaDBWrapper();
68 }
69 }
70
71 /** do the actual query */
72 protected Element processAudioQuery (Element request) {
73
74 // As the MG version needs to be java-synchronized (this inspiration for this class)
75 // And since it is not known how concurrent (thread-safe) Weka can be ...
76 // => Play it safe for now and restrict access to 'wekadb_src' using synchronized also
77 synchronized(this.wekadb_src) {
78 // Create a new (empty) result message ('doc' is in ServiceRack.java)
79 Document result_doc = XMLConverter.newDOM();
80 Element result = result_doc.createElement (GSXML.RESPONSE_ELEM);
81
82 // Rather than QUERY_SERVICE use "TextQuery"
83 // => makes the result looks the same as a text query
84 result.setAttribute (GSXML.FROM_ATT, "TextQuery");
85 result.setAttribute (GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
86
87 // Get the parameters of the request
88 Element param_list = (Element) GSXML.getChildByTagName (request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
89 if (param_list == null) {
90 logger.error ("Weka AudioQuery request had no paramList.");
91 return result; // Return the empty result
92 }
93
94 // Process the request parameters
95 HashMap params = GSXML.extractParams (param_list, false);
96
97 // Make sure a query has been specified
98 String query = (String) params.get (QUERY_PARAM);
99 if (query == null || query.equals ("")) {
100 return result; // Return the empty result
101 }
102
103 // If an index hasn't been specified, use the default
104 String index = (String) params.get (INDEX_PARAM);
105 if (index == null) {
106 index = WEKA_DB_DEFAULT_DIRECTORY;
107 }
108
109 // The location of the Weka db index
110 String toplevel_index_dir = GSFile.collectionIndexDir (this.site_home, this.cluster_name);
111 String weka_db_index_dir = toplevel_index_dir + File.separatorChar + index;
112 String assoc_index_dir = toplevel_index_dir + File.separatorChar + "assoc"; // ****
113
114 // set the Weka DB query parameters to the values the user has specified
115 setStandardQueryParams (params); // ****
116
117 this.wekadb_src.runQuery (weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query);
118 Vector docs = this.wekadb_src.getQueryResult ();
119
120 if (docs.isEmpty()) {
121 // something has gone wrong
122 GSXML.addError (result, "Couldn't query the Weka DB", GSXML.ERROR_TYPE_SYSTEM);
123 return result;
124 }
125 long totalDocs = docs.size();
126
127 // Get the docnums out, and convert to HASH ids
128 if (docs.size () == 0) {
129 logger.error ("No results found...\n");
130 }
131
132 // Create a metadata list to store information about the query results
133 Element metadata_list = result_doc.createElement (GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
134 result.appendChild (metadata_list);
135
136 // Add a metadata element specifying the number of matching documents
137 // because the total number is just the number returned, use numDocsReturned, not numDocsMatched
138 GSXML.addMetadata (metadata_list, "numDocsReturned", ""+totalDocs);
139 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
140 GSXML.addMetadata (metadata_list, "query", query);
141
142 if (docs.size () > 0) {
143 // Create a document list to store the matching documents, and add them
144 Element document_list = result_doc.createElement (GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
145 result.appendChild (document_list);
146 for (int d = 0; d < docs.size (); d++) {
147 WekaDBDocInfo adb_doc = (WekaDBDocInfo) docs.elementAt(d);
148
149 String doc_id = adb_doc.getDocID();
150 double rank = adb_doc.getTopRank();
151 String offsets = adb_doc.getOffsetList();
152
153 Element doc_node = createDocNode (result_doc, doc_id, Double.toString (rank));
154 doc_node.setAttribute("frameOffset", offsets);
155
156 document_list.appendChild (doc_node);
157 }
158 }
159
160 // Create an empty term list as a place holder for the term information
161 Element term_list = result_doc.createElement (GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
162 result.appendChild (term_list);
163
164 return result;
165 }//end of synchronized
166 }
167
168 // should probably use a list rather than map
169 protected boolean setStandardQueryParams(HashMap params)
170 {
171 Set entries = params.entrySet();
172 Iterator i = entries.iterator();
173 while (i.hasNext()) {
174 Map.Entry m = (Map.Entry)i.next();
175 String name = (String)m.getKey();
176 String value = (String)m.getValue();
177
178 if (name.equals(OFFSET_PARAM)) {
179 int offset = Integer.parseInt(value);
180 this.wekadb_src.setOffset(offset);
181 }
182 else if (name.equals(LENGTH_PARAM)) {
183 int length = Integer.parseInt(value);
184 this.wekadb_src.setLength(length);
185 }
186 else if (name.equals(RADIUS_PARAM)) {
187 double radius = Double.parseDouble(value);
188 this.wekadb_src.setRadius(radius);
189 }
190 else if (name.equals(MAXDOCS_PARAM)) {
191 int docs = Integer.parseInt(value);
192 this.wekadb_src.setMaxDocs(docs);
193 } // ignore any others
194 }
195 return true;
196 }
197
198
199}
200
201
Note: See TracBrowser for help on using the repository browser.