- Timestamp:
- 2006-11-08T16:24:39+13:00 (18 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/service
- Files:
-
- 3 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java
r13124 r13238 20 20 21 21 // Greenstone classes 22 import org.greenstone.mgpp.*; 22 23 import org.greenstone.gsdl3.util.*; 23 24 … … 27 28 import org.w3c.dom.NodeList; 28 29 30 // java classes 31 import java.util.Iterator; 32 import java.util.Set; 33 import java.util.HashMap; 34 import java.util.Map; 35 import java.util.ArrayList; 36 import java.util.Vector; 37 import java.io.File; 38 29 39 import org.apache.log4j.*; 30 40 … … 36 46 37 47 public class GS2MGPPSearch 38 extends Abstract MGPPSearch48 extends AbstractGS2FieldSearch 39 49 { 40 protected GDBMWrapper gdbm_src = null; 41 static Category logger = Category.getInstance(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName()); 50 private MGPPWrapper mgpp_src=null; 51 52 static Category logger = Category.getInstance(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName()); 42 53 43 54 /** constructor */ 44 55 public GS2MGPPSearch() 45 56 { 46 this. gdbm_src = new GDBMWrapper();57 this.mgpp_src = new MGPPWrapper(); 47 58 } 48 59 49 60 public void cleanUp() { 50 61 super.cleanUp(); 51 this. gdbm_src.closeDatabase();62 this.mgpp_src.unloadIndexData(); 52 63 } 53 64 … … 59 70 } 60 71 61 // Open GDBM database for querying 62 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem); 63 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) { 64 logger.error("Could not open GDBM database!"); 65 return false; 66 } 72 // the default level is also the level which gdbm is expecting 73 // this must not be overwritten 74 this.mgpp_src.setReturnLevel(this.default_level); 75 // return term info 76 this.mgpp_src.setReturnTerms(true); 77 // set the default - this may be overwritten by query params 78 this.mgpp_src.setQueryLevel(this.default_level); 79 67 80 return true; 68 81 } 69 /** returns the document type of the doc that the specified node 70 belongs to. should be one of 71 GSXML.DOC_TYPE_SIMPLE, 72 GSXML.DOC_TYPE_PAGED, 73 GSXML.DOC_TYPE_HIERARCHY 74 */ 75 protected String getDocType(String node_id){ 76 DBInfo info = this.gdbm_src.getInfo(node_id); 77 if (info == null) { 78 return GSXML.DOC_TYPE_SIMPLE; 79 } 80 String doc_type = info.getInfo("doctype"); 81 if (!doc_type.equals("")&&!doc_type.equals("doc")) { 82 return doc_type; 83 } 84 85 String top_id = OID.getTop(node_id); 86 boolean is_top = (top_id.equals(node_id) ? true : false); 87 88 String children = info.getInfo("contains"); 89 boolean is_leaf = (children.equals("") ? true : false); 90 91 if (is_top && is_leaf) { // a single section document 92 return GSXML.DOC_TYPE_SIMPLE; 93 } 94 95 // now we just check the top node 96 if (!is_top) { // we need to look at the top info 97 info = this.gdbm_src.getInfo(top_id); 98 } 99 if (info == null) { 100 return GSXML.DOC_TYPE_HIERARCHY; 101 } 102 103 String childtype = info.getInfo("childtype"); 104 if (childtype.equals("Paged")) { 105 return GSXML.DOC_TYPE_PAGED; 106 } 107 return GSXML.DOC_TYPE_HIERARCHY; 108 109 } 110 111 /** returns true if the node has child nodes */ 112 protected boolean hasChildren(String node_id){ 113 DBInfo info = this.gdbm_src.getInfo(node_id); 114 if (info == null) { 115 return false; 116 } 117 String contains = info.getInfo("contains"); 118 if (contains.equals("")) { 119 return false; 120 } 82 83 /** add in the mgpp specific params to TextQuery */ 84 protected void addCustomQueryParams(Element param_list, String lang) 85 { 86 super.addCustomQueryParams(param_list, lang); 87 createParameter(RANK_PARAM, param_list, lang); 88 } 89 90 protected boolean setUpQueryer(HashMap params) { 91 // set up mgpp_src 92 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, this.default_index); 93 this.mgpp_src.loadIndexData(indexdir); 94 95 // set up the query params 96 Set entries = params.entrySet(); 97 Iterator i = entries.iterator(); 98 while (i.hasNext()) { 99 Map.Entry m = (Map.Entry)i.next(); 100 String name = (String)m.getKey(); 101 String value = (String)m.getValue(); 102 103 if (name.equals(CASE_PARAM)) { 104 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); 105 this.mgpp_src.setCase(val); 106 } else if (name.equals(STEM_PARAM)) { 107 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); 108 this.mgpp_src.setStem(val); 109 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 110 int docs = Integer.parseInt(value); 111 this.mgpp_src.setMaxDocs(docs); 112 } else if (name.equals(LEVEL_PARAM)) { 113 this.mgpp_src.setQueryLevel(value); 114 } else if (name.equals(MATCH_PARAM)) { 115 int mode; 116 if (value.equals(MATCH_PARAM_ALL)) mode=1; 117 else mode=0; 118 this.mgpp_src.setMatchMode(mode); 119 } else if (name.equals(RANK_PARAM)) { 120 if (value.equals(RANK_PARAM_RANK)) { 121 this.mgpp_src.setSortByRank(true); 122 } else if (value.equals(RANK_PARAM_NONE)) { 123 this.mgpp_src.setSortByRank(false); 124 } 125 } // ignore any others 126 } 127 121 128 return true; 122 129 } 123 130 124 /** returns true if the node has a parent */ 125 protected boolean hasParent(String node_id){ 126 String parent = OID.getParent(node_id); 127 if (parent.equals(node_id)) { 128 return false; 131 protected Object runQuery(String query) { 132 this.mgpp_src.runQuery(query); 133 MGPPQueryResult mqr= this.mgpp_src.getQueryResult(); 134 return mqr; 135 136 } 137 138 protected long numDocsMatched(Object query_result) { 139 return ((MGPPQueryResult)query_result).getTotalDocs(); 140 } 141 142 protected String [] getDocIDs(Object query_result) { 143 144 Vector docs = ((MGPPQueryResult)query_result).getDocs(); 145 String [] doc_nums = new String [docs.size()]; 146 for (int d = 0; d < docs.size(); d++) { 147 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_)); 148 } 149 return doc_nums; 150 } 151 152 protected String [] getDocRanks(Object query_result) { 153 154 Vector docs = ((MGPPQueryResult)query_result).getDocs(); 155 String [] doc_ranks = new String [docs.size()]; 156 for (int d = 0; d < docs.size(); d++) { 157 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_); 158 } 159 return doc_ranks; 160 } 161 162 protected boolean addTermInfo(Element term_list, HashMap params, 163 Object query_result) { 164 165 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 166 167 Vector terms = ((MGPPQueryResult)query_result).getTerms(); 168 for (int t = 0; t < terms.size(); t++) { 169 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t); 170 171 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 172 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 173 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_); 174 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 175 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 176 String field = term_info.tag_; 177 if (field.equals(query_level)) { 178 // ignore 179 field = ""; 180 } 181 term_elem.setAttribute(FIELD_ATT, field); 182 183 Vector equiv_terms = term_info.equiv_terms_; 184 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER); 185 term_elem.appendChild(equiv_term_list); 186 187 for (int et = 0; et < equiv_terms.size(); et++) { 188 String equiv_term = (String) equiv_terms.get(et); 189 190 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); 191 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term); 192 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, ""); 193 equiv_term_elem.setAttribute(FREQ_ATT, ""); 194 equiv_term_list.appendChild(equiv_term_elem); 195 } 196 197 term_list.appendChild(term_elem); 129 198 } 130 199 return true; 131 200 } 132 201 133 /** convert MGPP internal id to Greenstone oid */ 134 protected String MGPPNum2OID(long docnum) 135 { 136 return this.gdbm_src.docnum2OID(docnum); 137 202 203 protected String addFieldInfo(String query, String field) { 204 if (field.equals("") || field.equals("ZZ")) { 205 return query; 206 } 207 return "["+query+"]:"+field; 208 } 209 protected void addQueryElem(StringBuffer final_query, String query, 210 String field, String combine) { 211 212 String comb=""; 213 if (final_query.length()>0) { 214 comb = " "+combine+" "; 215 } 216 final_query.append(comb+addFieldInfo(query,field)); 217 } 218 219 protected String addStemOptions(String query, String stem, 220 String casef, String accent) { 221 String mods = "#"; 222 if (casef != null) { 223 if (casef.equals("1")) { 224 mods += "i"; 225 } else { 226 mods += "c"; 227 } 228 } 229 if (stem != null) { 230 if (stem.equals("1")) { 231 mods += "s"; 232 } else { 233 mods+= "u"; 234 } 235 } 236 if (accent != null) { 237 if (accent.equals("1")) { 238 mods += "f"; 239 } else { 240 mods += "a"; 241 } 242 } 243 244 StringBuffer temp = new StringBuffer(); 245 String [] terms = query.split(" "); 246 for (int i=0; i<terms.length; i++) { 247 String t = terms[i].trim(); 248 // what is the TX bit about??? 249 if (!t.equals("") && !t.equals("TX")) { 250 temp.append(" "+t+mods); 251 } 252 } 253 return temp.toString(); 138 254 } 139 255 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java
r13124 r13238 20 20 21 21 // Greenstone classes 22 import org.greenstone.mg.*; 22 23 import org.greenstone.gsdl3.util.*; 23 24 … … 27 28 import org.w3c.dom.NodeList; 28 29 30 // java 31 import java.util.Vector; 32 import java.util.ArrayList; 33 import java.util.HashMap; 34 import java.util.Map; 35 import java.util.Set; 36 import java.util.Iterator; 37 import java.io.File; 38 29 39 import org.apache.log4j.*; 30 40 … … 36 46 37 47 public class GS2MGSearch 38 extends Abstract MGSearch48 extends AbstractGS2Search 39 49 { 40 protected GDBMWrapper gdbm_src = null; 50 51 protected MGWrapper mg_src = null; 52 41 53 static Category logger = Category.getInstance(org.greenstone.gsdl3.service.GS2MGSearch.class.getName()); 42 54 … … 45 57 public GS2MGSearch() 46 58 { 47 this. gdbm_src = new GDBMWrapper();59 this.mg_src = new MGWrapper(); 48 60 } 49 61 public void cleanUp() { 50 62 super.cleanUp(); 51 this.gdbm_src.closeDatabase(); 52 } 53 54 /** configure this service */ 55 public boolean configure(Element info, Element extra_info) 63 this.mg_src.unloadIndexData(); 64 } 65 66 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) { 67 // the index info - 68 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER); 69 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 70 int len = indexes.getLength(); 71 // now add even if there is only one 72 for (int i=0; i<len; i++) { 73 Element index = (Element)indexes.item(i); 74 index_ids.add(index.getAttribute(GSXML.NAME_ATT)); 75 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en")); 76 77 } 78 79 } 80 81 /** do the actual query */ 82 protected Element processTextQuery(Element request) 56 83 { 57 if (!super.configure(info, extra_info)){ 58 return false; 59 } 60 61 // Open GDBM database for querying 62 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem); 63 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) { 64 logger.error(" Could not open GDBM database!"); 65 return false; 84 85 // Create a new (empty) result message 86 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 87 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE); 88 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 89 90 // Get the parameters of the request 91 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 92 if (param_list == null) { 93 logger.error("TextQuery request had no paramList."); 94 return result; // Return the empty result 95 } 96 97 // Process the request parameters 98 HashMap params = GSXML.extractParams(param_list, false); 99 100 // Make sure a query has been specified 101 String query = (String) params.get(QUERY_PARAM); 102 if (query == null || query.equals("")) { 103 return result; // Return the empty result 104 } 105 106 // If an index hasn't been specified, use the default 107 String index = (String) params.get(INDEX_PARAM); 108 if (index == null) { 109 index = this.default_index; 110 } 111 112 // The location of the MG index and text files 113 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG 114 String textdir = GSFile.collectionTextPath(this.index_stem); 115 String indexpath = GSFile.collectionIndexPath(this.index_stem, index); 116 this.mg_src.setIndex(indexpath); 117 System.err.println("index path = "+indexpath); 118 // set the mg query parameters to the values the user has specified 119 setStandardQueryParams(params); 120 this.mg_src.runQuery(basedir, textdir, query); 121 MGQueryResult mqr = this.mg_src.getQueryResult(); 122 if (mqr.isClear()) { 123 // something has gone wrong 124 GSXML.addError(this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM); 125 return result; 126 } 127 long totalDocs = mqr.getTotalDocs(); 128 129 // Get the docnums out, and convert to HASH ids 130 Vector docs = mqr.getDocs(); 131 if (docs.size() == 0) { 132 logger.error("No results found...\n"); 133 } 134 135 // Create a metadata list to store information about the query results 136 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 137 result.appendChild(metadata_list); 138 139 // Add a metadata element specifying the number of matching documents 140 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched 141 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs); 142 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query 143 GSXML.addMetadata(this.doc, metadata_list, "query", query); 144 145 if (docs.size() > 0) { 146 // Create a document list to store the matching documents, and add them 147 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 148 result.appendChild(document_list); 149 for (int d = 0; d < docs.size(); d++) { 150 long docnum = ((MGDocInfo) docs.elementAt(d)).num_; 151 float rank = ((MGDocInfo) docs.elementAt(d)).rank_; 152 String doc_id = internalNum2OID(docnum); 153 Element doc_node = createDocNode(doc_id, Float.toString(rank)); 154 document_list.appendChild(doc_node); 155 } 156 } 157 158 // Create a term list to store the term information, and add it 159 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER); 160 result.appendChild(term_list); 161 Vector terms = mqr.getTerms(); 162 for (int t = 0; t < terms.size(); t++) { 163 MGTermInfo term_info = (MGTermInfo) terms.get(t); 164 165 String term = term_info.term_; 166 int stem_method = term_info.stem_method_; 167 Vector equiv_terms = term_info.equiv_terms_; 168 169 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 170 term_elem.setAttribute(GSXML.NAME_ATT, term); 171 term_elem.setAttribute(STEM_ATT, "" + stem_method); 172 173 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER); 174 term_elem.appendChild(equiv_term_list); 175 176 long total_term_freq = 0; 177 for (int et = 0; et < equiv_terms.size(); et++) { 178 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et); 179 180 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); 181 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_); 182 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_); 183 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_); 184 equiv_term_list.appendChild(equiv_term_elem); 185 186 total_term_freq += equiv_term_info.term_freq_; 187 } 188 189 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq); 190 term_list.appendChild(term_elem); 191 } 192 return result; 193 } 194 195 // should probably use a list rather than map 196 protected boolean setStandardQueryParams(HashMap params) 197 { 198 // set the default ones 199 this.mg_src.setReturnTerms(true); 200 this.mg_src.setCase(true); // turn casefolding on by default 201 Set entries = params.entrySet(); 202 Iterator i = entries.iterator(); 203 while (i.hasNext()) { 204 Map.Entry m = (Map.Entry)i.next(); 205 String name = (String)m.getKey(); 206 String value = (String)m.getValue(); 207 208 if (name.equals(CASE_PARAM)) { 209 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false); 210 this.mg_src.setCase(val); 211 } 212 else if (name.equals(STEM_PARAM)) { 213 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false); 214 this.mg_src.setStem(val); 215 } 216 else if (name.equals(MATCH_PARAM)) { 217 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0); 218 this.mg_src.setMatchMode(mode); 219 } 220 else if (name.equals(MAXDOCS_PARAM)) { 221 int docs = Integer.parseInt(value); 222 this.mg_src.setMaxDocs(docs); 223 } // ignore any others 66 224 } 67 225 return true; 68 226 } 69 /** returns the document type of the doc that the specified node 70 belongs to. should be one of 71 GSXML.DOC_TYPE_SIMPLE, 72 GSXML.DOC_TYPE_PAGED, 73 GSXML.DOC_TYPE_HIERARCHY 74 */ 75 protected String getDocType(String node_id){ 76 DBInfo info = this.gdbm_src.getInfo(node_id); 77 if (info == null) { 78 return GSXML.DOC_TYPE_SIMPLE; 79 } 80 String doc_type = info.getInfo("doctype"); 81 if (!doc_type.equals("")&&!doc_type.equals("doc")) { 82 return doc_type; 83 } 84 85 String top_id = OID.getTop(node_id); 86 boolean is_top = (top_id.equals(node_id) ? true : false); 87 88 String children = info.getInfo("contains"); 89 boolean is_leaf = (children.equals("") ? true : false); 90 91 if (is_top && is_leaf) { // a single section document 92 return GSXML.DOC_TYPE_SIMPLE; 93 } 94 95 // now we just check the top node 96 if (!is_top) { // we need to look at the top info 97 info = this.gdbm_src.getInfo(top_id); 98 } 99 if (info == null) { 100 return GSXML.DOC_TYPE_HIERARCHY; 101 } 102 103 String childtype = info.getInfo("childtype"); 104 if (childtype.equals("Paged")) { 105 return GSXML.DOC_TYPE_PAGED; 106 } 107 return GSXML.DOC_TYPE_HIERARCHY; 108 109 } 110 111 /** returns true if the node has child nodes */ 112 protected boolean hasChildren(String node_id){ 113 DBInfo info = this.gdbm_src.getInfo(node_id); 114 if (info == null) { 115 return false; 116 } 117 String contains = info.getInfo("contains"); 118 if (contains.equals("")) { 119 return false; 120 } 121 return true; 122 } 123 124 /** returns true if the node has a parent */ 125 protected boolean hasParent(String node_id){ 126 String parent = OID.getParent(node_id); 127 if (parent.equals(node_id)) { 128 return false; 129 } 130 return true; 131 } 132 133 /** convert MG internal id to Greenstone oid */ 134 protected String MGNum2OID(long docnum) 135 { 136 return this.gdbm_src.docnum2OID(docnum); 137 138 } 227 139 228 140 229 }
Note:
See TracChangeset
for help on using the changeset viewer.