Changeset 3755


Ignore:
Timestamp:
2003-02-21T11:47:33+13:00 (21 years ago)
Author:
mdewsnip
Message:

Alpha version of service for collection searching using MG.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java

    r3649 r3755  
    1919package org.greenstone.gsdl3.service;
    2020
     21// greenstone classes
     22import org.greenstone.mg.*;
     23import org.greenstone.gdbm.*;
    2124import org.greenstone.gsdl3.util.*;
    2225
     26// xml classes
    2327import org.w3c.dom.Document;
    24 import org.w3c.dom.Node;
     28import org.w3c.dom.Element;
     29import org.w3c.dom.Node;
     30import org.w3c.dom.NodeList;
    2531import org.w3c.dom.Text;
    26 import org.w3c.dom.Element;
    27 import org.w3c.dom.NodeList;
     32
     33// general java classes
     34import java.io.File;
     35import java.util.HashMap;
     36import java.util.Iterator;
     37import java.util.Map;
     38import java.util.Set;
     39import java.util.Vector;
     40
    2841/**
    2942 *
     
    3649
    3750    // the services on offer
     51    // these strings must match what is found in the properties file
    3852    private static final String TEXT_QUERY_SERVICE = "TextQuery";
    39    
    40 
    41     /** creates a display element containing all the text strings needed to display the service page, in the language specified */
    42     protected Element createServiceDisplay(String service, String lang) {
    43     Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
    44     display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM,  getTextString(service+".name", lang)));
    45     display.appendChild(GSXML.createTextElement(doc_,  GSXML.DISPLAY_SUBMIT_ELEM, getTextString(service+".submit", lang)));
    46 
    47     Element param;
    48 
    49     return display;
    50    
    51     }
    52    
     53
     54    // params used
     55    private static final String INDEX_PARAM = "index";
     56    private static final String CASE_PARAM = "case";
     57    private static final String STEM_PARAM = "stem";
     58    private static final String MATCH_PARAM = "matchMode";
     59    private static final String MATCH_PARAM_ALL = "all";
     60    private static final String MATCH_PARAM_SOME = "some";
     61    private static final String RANK_PARAM = "sortBy";
     62    private static final String RANK_PARAM_RANK = "rank";
     63    private static final String RANK_PARAM_NONE = "natural";
     64    private static final String MAXDOCS_PARAM = "maxDocs";
     65    private static final String BOOLEAN_PARAM_ON = "1";
     66    private static final String BOOLEAN_PARAM_OFF = "0";
     67    private static final String QUERY_PARAM = "query";
     68
     69    // elements used in the config file that are specific to this class
     70    private static final String DEFAULT_INDEX_ELEM = "defaultIndex";
     71    private static final String INDEX_ELEM = "index";
     72
     73    // !! Hack !!
     74    // public static final String TERM_ELEM = "term";
     75
     76    private MGWrapper mg_src_ = null;
     77    private GDBMWrapper gdbm_src_ = null;
     78
     79    private String default_index_ = null;
     80
     81    private Element config_info_ = null;
     82
     83
     84    /** constructor */
     85    public GS2MGSearch() {
     86    mg_src_ = new MGWrapper();
     87    gdbm_src_ = new GDBMWrapper();
     88    }
     89
     90
    5391    /** configure this service */
    54     public boolean configure(Element info) {
    55 
     92    public boolean configure(Element info)
     93    {
    5694    System.out.println("configuring GS2MGSearch");
     95    config_info_ = info;
     96
     97    // get the default index out of <defaultIndex> (buildConfig.xml)
     98    Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
     99    if (def != null) {
     100        default_index_ = def.getAttribute(GSXML.NAME_ATT);
     101    }
     102    if (default_index_ == null || default_index_.equals("")) {
     103        System.err.println("Error: default index not specified!");
     104        return false;
     105    }
    57106
    58107    Element e = null;
     
    69118    e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
    70119    e.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
     120    Element param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
     121    createTextQueryParamList(param_list, false, null);
     122    e.appendChild(param_list);
    71123    service_info_map_.put(TEXT_QUERY_SERVICE, e);
    72    
     124
     125    // Open GDBM database for querying
     126    String gdbm_db_file = GSFile.GDBMDatabaseFile(site_home_, cluster_name_);
     127    if (gdbm_src_.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
     128        return true;
     129    }
     130    else {
     131        System.err.println("Error: Could not open gdbm database!");
     132        return false;
     133    }
     134    }
     135
     136
     137    /** creates a new param element and adds it to the param list */
     138    protected void createParameter(String name, Element param_list, boolean display, String lang)
     139    {
     140    Element param=null;
     141
     142    if (name.equals(INDEX_PARAM)) {
     143        // the index info - read from config file
     144        Element index_list = (Element)GSXML.getChildByTagName(config_info_, INDEX_ELEM+GSXML.LIST_MODIFIER);
     145        NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
     146        int len = indexes.getLength();
     147        if (len > 1) { // add index param to list only if more than one index specified
     148        String [] inds  = new String[len];
     149        for (int i=0; i<len; i++) {
     150            inds[i] = ((Element)indexes.item(i)).getAttribute(GSXML.NAME_ATT);
     151        }
     152        if (display) {
     153            // use the same index names for now - should get these out of the config info
     154            param = GSXML.createParameterDisplay(doc_, INDEX_PARAM, getTextString("param."+INDEX_PARAM, lang), inds, inds);
     155        } else {
     156            param = GSXML.createParameter(doc_, INDEX_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, default_index_, inds);
     157        }
     158        }
     159    }
     160    else if (name.equals(CASE_PARAM)) {
     161        if (display) {
     162        String[] bool_ops = {"0", "1"};
     163        String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
     164        param = GSXML.createParameterDisplay(doc_, CASE_PARAM, getTextString("param."+CASE_PARAM, lang),  bool_ops, bool_texts);
     165        } else {
     166        param = GSXML.createParameter(doc_, CASE_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
     167        }
     168    }
     169    else if (name.equals(STEM_PARAM)) {
     170        if (display) {
     171        String[] bool_ops = {"0", "1"};
     172        String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
     173        param = GSXML.createParameterDisplay(doc_, STEM_PARAM, getTextString("param."+STEM_PARAM, lang),  bool_ops, bool_texts);
     174        } else {
     175        param = GSXML.createParameter(doc_, STEM_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
     176        }
     177    }
     178    else if (name.equals(MATCH_PARAM)) {
     179        String[] vals = {MATCH_PARAM_ALL, MATCH_PARAM_SOME};
     180        if (display) {
     181        String[] val_texts = {getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_ALL, lang),getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_SOME, lang)};
     182
     183        param = GSXML.createParameterDisplay(doc_, MATCH_PARAM, getTextString("param."+MATCH_PARAM, lang), vals, val_texts);
     184        } else {
     185        param = GSXML.createParameter(doc_, MATCH_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, MATCH_PARAM_ALL, vals);
     186        }   
     187    }
     188    else if (name.equals(RANK_PARAM)) {
     189        String[] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE };
     190        if (display) {
     191        String[] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang),
     192                      getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang)};
     193       
     194        param = GSXML.createParameterDisplay(doc_, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), vals1, vals1_texts);
     195        } else {
     196        param = GSXML.createParameter(doc_, RANK_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1 );
     197        }
     198    }
     199    else if (name.equals(MAXDOCS_PARAM)) {
     200        if (display) {
     201        param = GSXML.createParameterDisplay(doc_, MAXDOCS_PARAM, getTextString("param."+MAXDOCS_PARAM, lang), null, null);
     202        } else {
     203        param = GSXML.createParameter(doc_, MAXDOCS_PARAM, GSXML.PARAM_TYPE_INTEGER, "10", null);
     204        }
     205    }
     206    else if (name.equals(QUERY_PARAM)) {
     207        if (display) {
     208        param = GSXML.createParameterDisplay(doc_, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), null, null);
     209        } else {
     210        param = GSXML.createParameter(doc_, QUERY_PARAM, GSXML.PARAM_TYPE_STRING, null, null);
     211        }
     212    }
     213
     214    // add the param to the list
     215    if (param != null) {
     216        param_list.appendChild(param);
     217    }
     218    }
     219
     220
     221    /** this creates all teh params and appends them to param_list.
     222     * if display=true it creates the text strings version
     223     * otherwise it creates the description version
     224     */
     225    protected boolean createTextQueryParamList(Element param_list, boolean display, String lang)
     226    {
     227    // the order they are specified here is the order they appear on
     228    // the query form
     229    createParameter(INDEX_PARAM, param_list, display, lang);
     230    createParameter(CASE_PARAM, param_list, display, lang);
     231    createParameter(STEM_PARAM, param_list, display, lang);
     232    createParameter(MATCH_PARAM, param_list, display, lang);
     233    createParameter(RANK_PARAM, param_list, display, lang);
     234    createParameter(MAXDOCS_PARAM, param_list, display, lang);
     235    createParameter(QUERY_PARAM, param_list, display, lang);
    73236    return true;
    74237    }
    75   /** process a text query */
    76     protected Element processTextQuery(Element request) {
     238
     239
     240    /** creates a display element containing all the text strings needed to display the service page, in the language specified */
     241    protected Element createServiceDisplay(String service, String lang) {
     242    Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
     243    display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM,
     244                            getTextString(service+".name", lang)));
     245    display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_SUBMIT_ELEM,
     246                            getTextString(service+".submit", lang)));
     247
     248    // now need to add in the params
     249    if (service.equals(TEXT_QUERY_SERVICE)) {
     250        createTextQueryParamList(display, true, lang);
     251    }
     252
     253    return display;
     254    }
     255
     256
     257    /** process a text query */
     258    protected Element processTextQuery(Element request)
     259    {
    77260    Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
    78     result.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
    79    
    80     // dummy result
    81     Text t = null;
    82     t = doc_.createTextNode(TEXT_QUERY_SERVICE+" result... ");
    83     result.appendChild(t);
     261    String from = GSPath.appendLink(cluster_name_, TEXT_QUERY_SERVICE);
     262    result.setAttribute(GSXML.FROM_ATT, from);
     263    result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_QUERY);
     264    // result.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
     265
     266    // get param list
     267    Element param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
     268    if (param_list==null) {
     269        System.err.println("GS2MGSearch, TextQuery Error: no param list in request!");
     270        return result; // empty result
     271    }
     272    HashMap params = GSXML.extractParams(param_list);
     273    String query = (String)params.get(QUERY_PARAM);
     274    if (query == null) {
     275        // no query, no result
     276        return result;
     277    }
     278
     279    String index = (String) params.get(INDEX_PARAM);
     280    if (index == null) { // if it is not present, use the default index
     281        index = default_index_;
     282    }
     283    // System.out.println("Index: " + index);
     284
     285    // now set up the mg stuff
     286    String basedir = GSFile.collectionBaseDir(site_home_, cluster_name_) +
     287                       File.separatorChar;  // Needed for MG
     288    String textdir = GSFile.collectionTextPath(cluster_name_);
     289    String indexpath = GSFile.collectionIndexPath(cluster_name_, index);
     290    System.out.println("Base directory: " + basedir);
     291    System.out.println("Text directory: " + textdir);
     292    System.out.println("Index path: " + indexpath);
     293
     294    // set the mg query parameters to the values the user has specified
     295    setStandardQueryParams(params);
     296    mg_src_.setIndex(indexpath);
     297
     298    System.out.println("GS2MGSearch, query string: " + query);
     299    mg_src_.runQuery(basedir + File.separatorChar, textdir, query);
     300    MGQueryResult mqr = mg_src_.getQueryResult();
     301    long totalDocs = mqr.getTotalDocs();
     302    // System.out.println("Matching documents: " + totalDocs);
     303
     304    // get the docnums out, and convert to HASH ids
     305    Vector docs = mqr.getDocs();
     306    if (docs.size() == 0) {
     307        // no docs found
     308        System.out.println("No results found...\n");
     309        // return result;
     310    }
     311
     312    // create a documentList element
     313    Element c = doc_.createElement(GSXML.CONTENT_ELEM);
     314    result.appendChild(c);
     315
     316    // Create a metadata list to store information about the query results
     317    Element metadata_list = GSXML.addMetaList(doc_, c);
     318
     319    // Add a metadata element to the result content specifying the number of matches
     320    GSXML.addMetadata(doc_, metadata_list, "numMatchingDocs", "" + totalDocs);
     321    System.out.println("Metadata list: " + converter_.getString(metadata_list));
     322
     323    // get the terms out, and add to query result metadata
     324    Vector terms = mqr.getTerms();
     325    if (terms.size() == 0) {
     326        // no terms found
     327        System.out.println("No query term information available...\n");
     328    }
     329    else {
     330        for (int t = 0; t < terms.size(); t++) {
     331        String term = ((MGTermInfo) terms.elementAt(t)).term_;
     332        long term_freq = ((MGTermInfo) terms.elementAt(t)).term_freq_;
     333        Element metadata_elem = doc_.createElement(GSXML.METADATA_ELEM);
     334        metadata_elem.setAttribute(GSXML.TYPE_ATT, "term");
     335        metadata_elem.setAttribute(GSXML.NAME_ATT, term);
     336        metadata_elem.setAttribute(GSXML.VALUE_ATT, "" + term_freq);
     337        metadata_list.appendChild(metadata_elem);
     338        }
     339    }
     340
     341    Element document_list = doc_.createElement(GSXML.DOCUMENT_ELEM+GSXML.LIST_MODIFIER);
     342    c.appendChild(document_list);
     343    // add each document
     344    for (int d = 0; d < docs.size(); d++) {
     345        long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
     346        String id = gdbm_src_.docnum2Oid(docnum);
     347        Node no = GSXML.createDocumentElement(doc_, id);
     348        document_list.appendChild(no);
     349    }
     350
    84351    return result;
    85352    }
    86353
    87354
    88 }   
     355    // should probably use a list rather than map
     356    protected boolean setStandardQueryParams(HashMap params)
     357    {
     358    // set the default ones
     359    mg_src_.setReturnTerms(true);
     360    Set entries = params.entrySet();
     361    Iterator i = entries.iterator();
     362    while (i.hasNext()) {
     363        Map.Entry m = (Map.Entry)i.next();
     364        String name = (String)m.getKey();
     365        String value = (String)m.getValue();
     366
     367        if (name.equals(CASE_PARAM)) {
     368        boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
     369        mg_src_.setCase(val);
     370        }
     371        else if (name.equals(STEM_PARAM)) {
     372        boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
     373        mg_src_.setStem(val);
     374        }
     375        else if (name.equals(MATCH_PARAM)) {
     376        int mode;
     377        if (value.equals(MATCH_PARAM_ALL)) mode = 1;
     378        else mode = 0;
     379        mg_src_.setMatchMode(mode);
     380        }
     381        else if (name.equals(RANK_PARAM)) {
     382        if (value.equals(RANK_PARAM_RANK)) {
     383            mg_src_.setSortByRank(true);
     384        } else if (value.equals(RANK_PARAM_NONE)) {
     385            mg_src_.setSortByRank(false);
     386        }
     387        }
     388        else if (name.equals(MAXDOCS_PARAM)) {
     389        int docs = Integer.parseInt(value);
     390        mg_src_.setMaxDocs(docs);
     391        } // ignore any others
     392    }
     393    return true;
     394    }
     395}
Note: See TracChangeset for help on using the changeset viewer.