source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2AudioSearch.java@ 38154

Last change on this file since 38154 was 35179, checked in by davidb, 3 years ago

Now that an audio-content based recommender service (using Weka) is being added in for the MARS project, these two classes (originallly developed for the audioDB extension) are being added into the main code base, as both audioDB and mars extensions need to inherit from them

File size: 6.4 KB
Line 
1/*
2 * AbstractGS2AudioSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.OID;
23import org.greenstone.gsdl3.util.DBInfo;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.BasicDocumentDatabase;
26import org.greenstone.gsdl3.util.GSFile;
27
28// XML classes
29import org.w3c.dom.Document;
30import org.w3c.dom.Element;
31import org.w3c.dom.NodeList;
32
33// java
34import java.util.Vector;
35import java.util.ArrayList;
36import java.util.HashMap;
37import java.util.Map;
38import java.util.Set;
39import java.util.Iterator;
40import java.io.File;
41
42import org.apache.log4j.*;
43
44public abstract class AbstractGS2AudioSearch
45 extends AbstractAudioSearch
46{
47
48 protected static final String EQUIV_TERM_ELEM = "equivTerm";
49
50 protected static final String STEM_ATT = "stem";
51 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
52 protected static final String FREQ_ATT = "freq";
53
54 // Elements used in the config file that are specific to this class
55 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
56 protected static final String INDEX_STEM_ELEM = "indexStem";
57 protected static final String INDEX_ELEM = "index";
58 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
59 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
60 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
61 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
62
63
64 // Some indexing options
65 protected static final String STEMINDEX_OPTION = "stemIndexes";
66 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
67
68 /** the stem used for the index files */
69 protected String index_stem = null;
70
71 // stem indexes available
72 protected boolean does_case=true;
73 protected boolean does_stem=true;
74 protected boolean does_accent=false;
75
76 // maxnumeric -
77 protected int maxnumeric = 4;
78
79 BasicDocumentDatabase gs_doc_db = null;
80
81 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2AudioSearch.class.getName());
82
83
84 /** constructor */
85 public AbstractGS2AudioSearch()
86 {
87
88 }
89
90 public void cleanUp() {
91 super.cleanUp();
92 this.gs_doc_db.cleanUp();
93 }
94
95 /** configure this service */
96 public boolean configure(Element info, Element extra_info)
97 {
98 if (!super.configure(info,extra_info)) {
99 return false;
100 }
101
102 // find out what kind of database we have
103 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
104 String database_type = null;
105 if (database_type_elem != null) {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals("")) {
109 database_type = "gdbm"; // the default
110 }
111
112 // the index stem is either the collection name or is specified in the config file
113 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
114 if (index_stem_elem != null) {
115 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
116 }
117 if (this.index_stem == null || this.index_stem.equals("")) {
118 logger.warn("indexStem element not found, stem will default to collection name");
119 this.index_stem = this.cluster_name;
120 }
121
122 // replaces default AbstractSearch version with one tied to database
123 gs_doc_db = new BasicDocumentDatabase(database_type,this.site_home,
124 this.cluster_name,
125 this.index_stem);
126 if (!gs_doc_db.isValid()) {
127 logger.error("Failed to open Document Database.");
128 return false;
129 }
130 this.gs_doc = gs_doc_db;
131
132 // do we support any of the extended features?
133 does_chunking = true;
134
135 // Get the default index out of <defaultIndex> (buildConfig.xml)
136 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
137 if (def != null) {
138 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
139 } // otherwise will be "", and the first one will be the default
140
141
142 // Get index options ...
143 // ... but there are currently no index options supported for audio
144 // so nothing to do
145
146 // Similarly the following will not currently do anything, but leave
147 // in for the time we do start supporting different indexes
148 //
149 // *** Also, it is identical to that in AbstractGS2TextSearch, so
150 // consider putting into supporting routine and sharing
151 // (realted in 'info' object
152
153 // get display info from extra info
154 if (extra_info !=null) {
155 Document owner = info.getOwnerDocument();
156 // so far we have index specific display elements, and global format elements
157 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
158 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
159
160 for (int i=0; i<indexes.getLength();i++) {
161 Element ind = (Element)indexes.item(i);
162 String name = ind.getAttribute(GSXML.NAME_ATT);
163 Element node_extra = GSXML.getNamedElement(config_search,
164 GSXML.INDEX_ELEM,
165 GSXML.NAME_ATT,
166 name);
167 if (node_extra == null) {
168 logger.error("haven't found extra info for index named "+name);
169 continue;
170 }
171
172 // get the display elements if any - displayName
173 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
174 if (display_names !=null) {
175 for (int j=0; j<display_names.getLength(); j++) {
176 Element e = (Element)display_names.item(j);
177 ind.appendChild(owner.importNode(e, true));
178 }
179 }
180 } // for each index
181 }
182
183 return true;
184 }
185
186 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
187 {
188 logger.info("Trivial index support for audio for now. Adding hardwired 'audioDB' to index");
189 index_names.add("audioDB");
190 }
191}
192
193
Note: See TracBrowser for help on using the repository browser.