source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS3Search.java@ 6517

Last change on this file since 6517 was 6517, checked in by kjdon, 20 years ago

now classifiers work, and searching works, and hierarchical docs work

  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1/*
2 * GS3Search.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Element;
27import org.w3c.dom.Document;
28import org.w3c.dom.NodeList;
29
30
31/**
32 *
33 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
34 * @version $Revision: 6517 $
35 */
36
37public abstract class GS3Search
38 extends ServiceRack {
39
40 // the services on offer
41 // these strings must match what is found in the properties file
42 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
43
44 // Parameters used
45 protected static final String INDEX_PARAM = "index";
46 protected static final String CASE_PARAM = "case";
47 protected static final String STEM_PARAM = "stem";
48 protected static final String MATCH_PARAM = "matchMode";
49 protected static final String MATCH_PARAM_ALL = "all";
50 protected static final String MATCH_PARAM_SOME = "some";
51 protected static final String MAXDOCS_PARAM = "maxDocs";
52 protected static final String BOOLEAN_PARAM_ON = "1";
53 protected static final String BOOLEAN_PARAM_OFF = "0";
54 protected static final String QUERY_PARAM = "query";
55
56 protected static final String EQUIV_TERM_ELEM = "equivTerm";
57
58 protected static final String STEM_ATT = "stem";
59 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
60 protected static final String FREQ_ATT = "freq";
61
62 // Elements used in the config file that are specific to this class
63 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
64 protected static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
65 protected static final String INDEX_ELEM = "index";
66 protected static final String LEVEL_ELEM = "level";
67
68 //protected GDBMWrapper gdbm_src = null;
69 protected SQLQuery database = null;
70
71 protected Element config_info = null;
72
73 /** the default index */
74 protected String default_index = null;
75
76
77 /** constructor */
78 public GS3Search()
79 {
80 this.database = new SQLQuery();
81 //this.gdbm_src = new GDBMWrapper();
82 }
83
84
85 /** configure this service */
86 public boolean configure(Element info, Element extra_info)
87 {
88 System.out.println("Configuring GS3Search...");
89 addExtraQueryInfo(info, extra_info);
90 this.config_info = info;
91
92 // Get the default index out of <defaultIndex> (buildConfig.xml)
93 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
94 if (def != null) {
95 this.default_index = def.getAttribute(GSXML.NAME_ATT);
96 }
97 if (this.default_index == null || this.default_index.equals("")) {
98 System.err.println("Error: default index not specified!");
99 return false;
100 }
101
102 // these entries should reflect the build config file - some services may not be available depending on how the collection was built.
103 // set up short_service_info_ - for now just has id and type. the name (lang dependent) will be added in if the list is requested.
104 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
105 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
106 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
107 this.short_service_info.appendChild(tq_service);
108
109
110 // Open GDBM database for querying
111 //String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
112 //if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
113 // System.err.println("Error: Could not open GDBM database!");
114 // return false;
115 //}
116 // open the database for querying
117 if (!database.setDatabase(this.cluster_name)) {
118 System.err.println("GS3Search Error: Could not open SQL database!");
119 return false;
120 }
121 // add some format info to service map if there is any
122 Element format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
123 if (format != null) {
124 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true));
125 }
126
127 return true;
128 }
129
130 protected Element getServiceDescription(String service, String lang, String subset) {
131
132 if (!service.equals(TEXT_QUERY_SERVICE)) {
133 return null;
134 }
135 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
136 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
137 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
138 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM)) {
139 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang)));
140 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang)));
141 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang)));
142 }
143 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
144 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
145 createTextQueryParamList(param_list, lang);
146 tq_service.appendChild(param_list);
147 }
148 return tq_service;
149
150 }
151
152 protected boolean addExtraQueryInfo(Element info, Element extra_info){
153
154 if (extra_info == null) {
155 return false;
156 }
157
158 Document owner = info.getOwnerDocument();
159 // so far we have index specific display elements, and global format elements
160 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
161 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
162
163 for (int i=0; i<indexes.getLength();i++) {
164 Element ind = (Element)indexes.item(i);
165 String name = ind.getAttribute(GSXML.NAME_ATT);
166 Element node_extra = GSXML.getNamedElement(config_search,
167 GSXML.INDEX_ELEM,
168 GSXML.NAME_ATT,
169 name);
170 if (node_extra == null) {
171 System.err.println("GS3Search: haven't found extra info for index named "+name);
172 continue;
173 }
174
175 // get the display elements if any - displayName
176 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
177 if (display_names !=null) {
178 for (int j=0; j<display_names.getLength(); j++) {
179 Element e = (Element)display_names.item(j);
180 ind.appendChild(owner.importNode(e, true));
181 }
182 }
183 } // for each index
184
185 // get the format element if any
186 Element format = (Element)GSXML.getChildByTagName(config_search,
187 GSXML.FORMAT_ELEM);
188 if (format!=null) { // append to info
189 info.appendChild(owner.importNode(format, true));
190 }
191 return true;
192
193
194 }
195/** creates a new param element and adds it to the param list */
196 protected void createParameter(String name, Element param_list,
197 /*boolean display,*/ String lang)
198 {
199 Element param=null;
200
201 if (name.equals(INDEX_PARAM)) {
202 // the index info - read from config file
203 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
204 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
205 int len = indexes.getLength();
206 // now add even if there is only one
207 String [] inds = new String[len];
208 String [] ind_names = new String[len];
209 for (int i=0; i<len; i++) {
210 Element index = (Element)indexes.item(i);
211 inds[i] = index.getAttribute(GSXML.NAME_ATT);
212 ind_names[i] = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
213
214 }
215 param = GSXML.createParameterDescription(this.doc, INDEX_PARAM, getTextString("param."+INDEX_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, this.default_index, inds, ind_names);
216
217 }
218 else if (name.equals(CASE_PARAM) || name.equals(STEM_PARAM)) {
219 String[] bool_ops = {"0", "1"};
220 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
221 param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts);
222 }
223 else if (name.equals(MATCH_PARAM)) {
224 String[] vals = {MATCH_PARAM_ALL, MATCH_PARAM_SOME};
225 String[] val_texts = {getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_ALL, lang),getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_SOME, lang)};
226 param = GSXML.createParameterDescription(this.doc, MATCH_PARAM, getTextString("param."+MATCH_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, MATCH_PARAM_ALL, vals, val_texts);
227
228 }
229 else if (name.equals(MAXDOCS_PARAM)) {
230 param = GSXML.createParameterDescription(this.doc, MAXDOCS_PARAM, getTextString("param."+MAXDOCS_PARAM, lang), GSXML.PARAM_TYPE_INTEGER, "10", null, null);
231
232 }
233 else if (name.equals(QUERY_PARAM)) {
234 param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
235
236 }
237
238 // add the param to the list
239 if (param != null) {
240 param_list.appendChild(param);
241 }
242 }
243
244
245 /** this creates all the params and appends them to param_list.
246 * if display=true it creates the text strings version
247 * otherwise it creates the description version
248 */
249 protected abstract boolean createTextQueryParamList(Element param_list,
250 String lang);
251
252 /** Creates a new documentNode element containing ID, node type
253 * and docType*/
254 protected Element createDocumentNodeElement(String node_id)
255 {
256 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257 doc_node.setAttribute(GSXML.NODE_ID_ATT, node_id);
258
259 String top_id = OID.getTop(node_id);
260 boolean is_top = (top_id.equals(node_id) ? true : false);
261
262 doc_node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
263
264 if (GS3OID.isTop(node_id)) {
265 doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
266 } else if (database.documentHasChildren(node_id)){
267 doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
268 } else {
269 doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
270 }
271
272 return doc_node;
273 }
274// DBInfo info = this.gdbm_src.getInfo(node_id);
275// if (info == null) { // make it up - cant query the gdbm db
276// doc_node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
277// return doc_node;
278// }
279// String children = info.getInfo("contains");
280// boolean is_leaf = (children.equals("") ? true : false);
281
282// // check for simple doc types
283// if (is_top && is_leaf) { // a single section document
284// doc_node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
285// return doc_node;
286// }
287// // set teh node type att
288// if (is_top) {
289// doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
290// } else if (is_leaf) {
291// doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
292// } else {
293// doc_node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
294// }
295
296// if (!is_top) { // we need to look at the top info
297// info = this.gdbm_src.getInfo(top_id);
298// }
299
300// String childtype = info.getInfo("childtype");
301// if (childtype.equals("Paged")) {
302// doc_node.setAttribute(GSXML.DOC_TYPE_ATT, "paged");
303// } else {
304// doc_node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
305// }
306// return doc_node;
307// }
308
309 /** returns the document type of a node - if the node is a subnode, it returns teh type of teh top document node. for now, only does paged and hierarchical, but eventually will have more types */
310// protected String getDocType(String node_id) {
311
312// String doc_id = OID.getTop(node_id);
313// DBInfo info = this.gdbm_src.getInfo(doc_id);
314// String child_type = info.getInfo("childtype");
315// if (child_type.equals("Paged")) {
316// return "paged";
317// }
318// return "hierarchy";
319
320// }
321
322 /** Returns true if the OID specifies a leaf node, false otherwise
323 Note: this makes a request to the GDBM database so it may not be cheap */
324// protected boolean isLeafNode(String oid)
325// {
326// DBInfo info = this.gdbm_src.getInfo(oid);
327// String children = info.getInfo("contains");
328// return (children.equals(""));
329// }
330
331
332 /** Process a text query - implemented by concrete subclasses */
333 protected abstract Element processTextQuery(Element request);
334}
335
Note: See TracBrowser for help on using the repository browser.