Changeset 3389 for trunk


Ignore:
Timestamp:
2002-08-23T10:10:18+12:00 (22 years ago)
Author:
kjdon
Message:

this now sends back real results

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/service/PhindService.java

    r3377 r3389  
    1919package org.greenstone.gsdl3.service;
    2020
     21import org.greenstone.gsdl3.util.*;
     22
     23import org.greenstone.mgpp.*;
    2124import org.w3c.dom.Document;
    2225import org.w3c.dom.Node;
     
    2427import org.w3c.dom.Text;
    2528
     29import java.util.Vector;
     30import java.util.HashMap;
    2631/**
    2732 * PhindService - the phind phrase browsing service
     
    3237public class PhindService
    3338    extends ServiceModule {
    34 
    3539   
    36     protected Element processService(String name, Element request) {
    37 
    38     if (!name.equals("PhindApplet")) {
    39         System.err.println("PhindService:you have asked for a non-existant service - "+name+"!");
    40         return null;
    41     }
    42     // create dummy response
    43     Element res = doc_.createElement("response");
    44     res.setAttribute("from", "PhindApplet");
    45     Element data = doc_.createElement("content");
    46     Text t = doc_.createTextNode("this is the results for a phind request");
    47     data.appendChild(t);
    48    
    49     res.appendChild(data);
    50 
    51     return res;
    52    
     40    private MGPPWrapper mgpp_src_=null;
     41    private String basepath_ = null;
     42    public PhindService() {
     43    mgpp_src_ = new MGPPWrapper();
     44    // set up the default params
     45    mgpp_src_.setQueryLevel("Document");
     46    mgpp_src_.setReturnLevel("Document");
     47    mgpp_src_.setMaxDocs(5);
     48    mgpp_src_.setStem(false);
     49    mgpp_src_.setCase(true);
    5350    }
    5451    /** configure the service module
     
    7370    f.setAttribute("name", "PhindApplet");
    7471
    75     // add in teh applet info for the phind applet
    76     String app_info = "<applet CODEBASE='/gsdl3/lib/java' CODE='org.greenstone.applet.phind.Phind.class' ARCHIVE='phind.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library'     VALUE='/gsdl3/library'/> <PARAM NAME='phindcgi' VALUE='/gsdl3/library?a=b&amp;sa=phind'/> <PARAM NAME='collection'   VALUE='mgppdemo'/> <PARAM NAME='classifier'   VALUE='1'/>  <PARAM NAME='orientation'  VALUE='vertical'/> <PARAM NAME='depth'        VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop'     VALUE='/gsdl3/interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize'     VALUE='10'/> <PARAM NAME='blocksize'    VALUE='10'/>The Phind java applet.</applet>";
     72    // add in the applet info for the phind applet
     73    // need to make this dynamic - library names etc
     74    String app_info = "<applet CODEBASE='/gsdl3/lib/java' CODE='org.greenstone.applet.phind.Phind.class' ARCHIVE='phind.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library' VALUE='/gsdl3/library'/> <PARAM NAME='phindcgi' VALUE='/gsdl3/library?a=a&amp;sa=r&amp;sn=Phind'/> <PARAM NAME='collection'   VALUE='mgppdemo'/> <PARAM NAME='classifier' VALUE='1'/>  <PARAM NAME='orientation'  VALUE='vertical'/> <PARAM NAME='depth' VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop' VALUE='/gsdl3/interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize' VALUE='10'/> <PARAM NAME='blocksize'    VALUE='10'/>The Phind java applet.</applet>";
    7775
    7876    Document dom = converter_.getDOM(app_info);
     
    8583    }
    8684
     85    protected Element processService(String name, Element request) {
     86   
     87    if (!name.equals("PhindApplet")) {
     88        System.err.println("PhindService:you have asked for a non-existant service - "+name+"!");
     89        return null;
     90    }
     91    Element param_elem = (Element)GSXML.getChildByTagName(request, "paramList");
     92    HashMap params = GSXML.extractParams(param_elem);
     93   
     94    long first_e = Long.parseLong((String)params.get("pfe"));
     95    long last_e = Long.parseLong((String)params.get("ple"));
     96    long first_l = Long.parseLong((String)params.get("pfl"));
     97    long last_l = Long.parseLong((String)params.get("pll"));
     98    long first_d = Long.parseLong((String)params.get("pfd"));
     99    long last_d = Long.parseLong((String)params.get("pld"));
     100   
     101    long phrase;
     102    String phrase_str = (String)params.get("ppnum");
     103    if (phrase_str == null || phrase_str.equals("")) {
     104        phrase=0;
     105    } else {
     106        phrase = Long.parseLong(phrase_str);
     107    }
     108    String word = (String)params.get("pptext");
     109    String phind_index = (String)params.get("pc");
     110    // the location of the mgpp database files
     111    basepath_ = GSFile.phindBasePath(site_home_, collection_name_, phind_index);
     112
     113    // the result element
     114    Element result = doc_.createElement("response");
     115    String from = GSPath.appendLink(collection_name_, "PhindApplet");
     116    result.setAttribute("from", from);
     117    result.setAttribute("type", "query");
     118
     119    // applet result info must be in appletInfo element
     120    Element applet_data = doc_.createElement("appletData");
     121    result.appendChild(applet_data);
     122    Element phind_data = doc_.createElement("phindData");
     123    applet_data.appendChild(phind_data);
     124
     125
     126    // if we dont know the phrase number, look it up
     127    if (phrase == 0) {
     128        if (word==null || word.equals("")) {
     129        Element error = phindError("no word or phrase");
     130        phind_data.appendChild(error);
     131        return result;
     132        }
     133        phrase = findPhraseNumberFromWord( word);
     134        System.out.println("phind, term number for "+word+" is "+phrase);
     135    }
     136    if (phrase==0) {
     137        // the word is not in the collection
     138        // return a phind error string
     139        Element error = phindError("the term "+word+" is not in the collection");
     140        phind_data.appendChild(error);
     141        return result;
     142    }
     143   
     144    // get the phrase data into the phind_data node
     145    getPhraseData(phind_data, phrase, first_l, last_l,
     146              first_e, last_e,  first_d, last_d);
     147    return result;
     148   
     149   
     150    }// processService
     151   
     152    protected long findPhraseNumberFromWord(String word) {
     153
     154    // set the mgpp index data - we are looking up pword
     155    mgpp_src_.loadIndexData(basepath_, "pword");
     156
     157    mgpp_src_.runQuery(word);
     158
     159    MGPPQueryResult res = mgpp_src_.getQueryResult();
     160    Vector docs = res.getDocs();
     161    if (docs.size()==0) {
     162        // phrase not found
     163        return 0;
     164    }
     165    MGPPDocInfo doc = (MGPPDocInfo)docs.firstElement();
     166    return doc.num_;
     167    }
     168
     169    protected boolean getPhraseData(Element phind_data,
     170                    long phrase, long first_l, long last_l,
     171                    long first_e, long last_e, long first_d,
     172                    long last_d) {
     173
     174    String record = mgpp_src_.getDocument(basepath_, "pdata", "Document",
     175                          phrase);
     176    if (record.equals("")) {
     177        Element error = phindError("somethings gone wrong - we haven't got a record for phrase number "+phrase);
     178        phind_data.appendChild(error);
     179        return false;
     180    }
     181   
     182    System.out.println("record="+record);
     183    // parse the record - its in gordons cryptic form
     184    // ":word:tf:ef:df:el:dl:lf:ll"
     185    // el: e,e,e
     186    // dl: d;f,d;f,
     187    // lf and ll may be null
     188    // l: type,dest, dest; type,dest,dest
     189
     190    // ignore everything up to and including first colon (has
     191    // <Document>3505: at the start)
     192    record = record.substring(record.indexOf(':')+1);
     193
     194    // split on ':'
     195    String [] fields = record.split(":");
     196    String word = fields[0];
     197    String tf = fields[1];
     198    String ef = fields[2];
     199    String df = fields[3];
     200
     201   
     202    String expansions = fields[4];
     203    String documents = fields[5];
     204    String lf = "0";
     205    String linklist = "";
     206    if (fields.length > 7) {// have thesaurus stuff
     207        lf =fields[6];
     208        linklist = fields[7];
     209    }
     210   
     211    // the phindData attributes and phrase
     212    phind_data.setAttribute("id", Long.toString(phrase));
     213    phind_data.setAttribute("df", df);
     214    phind_data.setAttribute("ef", ef);
     215    phind_data.setAttribute("lf", lf);
     216    phind_data.setAttribute("tf", tf);
     217    GSXML.createTextElement(doc_, "phrase", word);
     218
     219    addExpansionList(phind_data, expansions, word, ef, first_e, last_e);
     220    addDocumentList(phind_data, documents, word, df, first_d, last_d);
     221    if (!lf.equals("0")) {
     222        System.out.println("adding thesaurus stuff");
     223        addThesaurusList(phind_data, linklist, word, lf, first_l, last_l);
     224    }
     225    return true;
     226    }
     227
     228    protected boolean addExpansionList( Element phind_data, String record,
     229                       String word,
     230                       String freq,
     231                       long first, long last) {
     232
     233    Element expansion_list = doc_.createElement("expansionList");
     234    phind_data.appendChild(expansion_list);
     235    expansion_list.setAttribute("length", freq);
     236    expansion_list.setAttribute("start", Long.toString(first));
     237    expansion_list.setAttribute("end", Long.toString(last));
     238
     239    // get the list of strings
     240    String [] expansions = record.split(",");
     241    int length = expansions.length;
     242    if (length < last) last = length;
     243    for (long i = first; i < last; i++) {
     244        long num  = Long.parseLong(expansions[(int)i]);
     245        Element expansion = getExpansion( num, word);
     246        expansion.setAttribute("num", Long.toString(i));
     247        expansion_list.appendChild(expansion);
     248    }
     249    return true;
     250    }
     251   
     252    protected Element getExpansion(long phrase_num,
     253                   String orig_phrase) {
     254   
     255    // look up the phrase in the pdata thingy
     256    String record = mgpp_src_.getDocument(basepath_, "pdata", "Document",
     257                          phrase_num);
     258
     259    if (record ==null || record.equals("")) return null;
     260
     261    // ignore everything up to and including first colon
     262    record = record.substring(record.indexOf(':')+1);
     263
     264    String [] fields = record.split(":");
     265    String phrase = fields[0];
     266    String tf = fields[1];
     267    //String ef = fields[2]; dont use this
     268    String df = fields[3];
     269
     270    Element expansion = doc_.createElement("expansion");
     271    expansion.setAttribute("tf", tf);
     272    expansion.setAttribute("df", df);
     273    expansion.setAttribute("id", Long.toString(phrase_num));
     274
     275    // get teh suffix and prefix
     276    String [] ends = splitPhraseOnWord(phrase, orig_phrase);
     277    if (!ends[0].equals("")) {
     278        expansion.appendChild(GSXML.createTextElement(doc_, "prefix", ends[0]));
     279    }
     280    if (!ends[1].equals("")) {
     281        expansion.appendChild(GSXML.createTextElement(doc_, "suffix", ends[1]));
     282    }
     283
     284    return expansion;
     285
     286    }
     287
     288    protected boolean addDocumentList(Element phind_data, String record,
     289                      String word,
     290                      String freq,
     291                      long first, long last) {
     292
     293    Element document_list = doc_.createElement("documentList");
     294    phind_data.appendChild(document_list);
     295    document_list.setAttribute("length", freq);
     296    document_list.setAttribute("start", Long.toString(first));
     297    document_list.setAttribute("end", Long.toString(last));
     298
     299    // get the list of doc,freq
     300    String [] doc_freqs = record.split(";");
     301    int length = doc_freqs.length;
     302    if (length<last) last=length;
     303
     304    for (long i = first; i < last; i++) {
     305        String doc_elem = doc_freqs[(int)i];
     306        int p = doc_elem.indexOf(',');
     307        long doc_num;
     308        String doc_freq;
     309        if (p == -1) { // there is no freq in the record
     310        doc_num =Long.parseLong(doc_elem);
     311        doc_freq = "1";
     312        } else {
     313        doc_num = Long.parseLong(doc_elem.substring(0,p));
     314        doc_freq = doc_elem.substring(p+1);
     315        }
     316        Element document = getDocument( doc_num);
     317        document.setAttribute("freq", doc_freq);
     318        document.setAttribute("num", Long.toString(i));
     319        document_list.appendChild(document);
     320    }
     321
     322   
     323    return true;
     324    }
     325
     326
     327    protected Element getDocument(long doc_num) {
     328   
     329    // look up the phrase in the docs thingy
     330    String record = mgpp_src_.getDocument(basepath_, "docs", "Document",
     331                          doc_num);
     332   
     333    if (record ==null || record.equals("")) return null;
     334    System.out.println("doc record:"+record);
     335   
     336    // ignore everything up to and including first \t
     337    record = record.substring(record.indexOf('\t')+1);
     338
     339    String [] fields = record.split("\t");
     340    String hash = fields[0];
     341    String title = fields[1];
     342
     343    Element d = doc_.createElement("document");
     344    d.setAttribute("hash", hash);
     345    d.appendChild(GSXML.createTextElement(doc_, "title", title));
     346   
     347    return d;
     348
     349    }
     350    protected boolean addThesaurusList(Element phind_data, String record,
     351                       String word,
     352                       String freq,
     353                       long first, long last) {
     354
     355
     356    Element thesaurus_list = doc_.createElement("thesaurusList");
     357    phind_data.appendChild(thesaurus_list);
     358    thesaurus_list.setAttribute("length", freq);
     359    thesaurus_list.setAttribute("start", Long.toString(first));
     360    thesaurus_list.setAttribute("end", Long.toString(last));
     361   
     362    System.out.println("record for thesaurus="+record);
     363
     364    // get the list of type,dest,dest
     365    String [] links = record.split(";");
     366    int length = links.length;
     367    long index = 0;
     368    for (int i = 0; i < length; i++) { // go through the entries
     369        String link_info = links[(int)i];
     370        String [] items = link_info.split(",");
     371        // the first entry is teh type
     372        String type = items[0];
     373        for (int j = 1; j<items.length; j++, index++) {
     374        if (index >= first && index < last) { // only output the ones we want
     375            long phrase = Long.parseLong(items[j]);
     376            Element t = getThesaurus(phrase);
     377            t.setAttribute("type", type);
     378            thesaurus_list.appendChild(t);
     379        }
     380        }
     381    }
     382
     383    return true;
     384    }
     385
     386    protected Element getThesaurus(long phrase_num) {
     387
     388    // look up the phrase in the pdata thingy
     389    String record = mgpp_src_.getDocument(basepath_, "pdata", "Document",
     390                          phrase_num);
     391
     392    if (record ==null || record.equals("")) return null;
     393
     394    // ignore everything up to and including first colon
     395    record = record.substring(record.indexOf(':')+1);
     396
     397    String [] fields = record.split(":");
     398    String phrase = fields[0];
     399    String tf = fields[1];
     400    //String ef = fields[2]; dont use this
     401    String df = fields[3];
     402
     403    Element thesaurus = doc_.createElement("thesaurus");
     404    thesaurus.setAttribute("tf", tf);
     405    thesaurus.setAttribute("df", df);
     406    thesaurus.setAttribute("id", Long.toString(phrase_num));
     407    thesaurus.appendChild(GSXML.createTextElement(doc_, "phrase", phrase));
     408    return thesaurus;
     409
     410    }
     411
     412    /** returns an array of two elements - the prefix and the suffix*/
     413    protected String [] splitPhraseOnWord(String phrase, String word) {
     414   
     415    if (word.equals("")) {
     416       
     417        String [] res =  {phrase, ""};
     418        return res;
     419    }
     420    // use 2 so that we only split on the first occurrance. trailing empty strings should be included
     421    String [] result = phrase.split(word, 2);
     422    if (result.length !=2) {
     423        System.out.println("didn't get two substrings!!");
     424    }
     425    return result;
     426   
     427    }
     428
     429    protected Element phindError(String message) {
     430    Element e = doc_.createElement("phindError");
     431    Text t = doc_.createTextNode(message);
     432    e.appendChild(t);
     433    return e;
     434    }
    87435   
    88436}
     437
     438
     439    /*
     440    // CREATE dummy response
     441    Element res = doc_.createElement("response");
     442    res.setAttribute("from", "PhindApplet");
     443    Element data = doc_.createElement("service");
     444    Element app_data = doc_.createElement("appletData");
     445    data.appendChild(app_data);
     446    String phind_info ="<phindData id='2507' tf='19424' ef='1632' df='1843' lf='0'><phrase>FOREST</phrase><expansionList length='1632' start='0' end='10'><expansion num='0' id='177648' tf='2162' df='519'><suffix>MANAGEMENT</suffix></expansion> <expansion num='1' id='177531' tf='1958' df='566'><suffix>PRODUCTS</suffix></expansion> <expansion num='2' id='177469' tf='1328' df='532'><suffix>RESOURCES</suffix></expansion> <expansion num='3' id='177773' tf='943' df='177'><suffix>GENETIC</suffix></expansion> <expansion num='4' id='177335' tf='736' df='258'><prefix>SUSTAINABLE</prefix></expansion> </expansionList><documentList length='1843' start='0' end='10'><document num='0' hash='HASH011fb8a7d8bf781ab3cbb087' freq='363'><title>FO-edu List of Countries 0</title></document><document num='1' hash='HASH27ae41229eb0636849a5be' freq='344' ><title>FO-edu List of Countries 1</title></document><document num='2' hash='HASH0187ef85c9dbf5bf132ea1d1' freq='263'><title>FO-edu List of Countries 2</title></document><document num='3' hash='HASH0125ec9ef67960446f471280' freq='238'><title>FO-edu List of Countries 3</title></document><document num='4' hash='HASH67087f7717eb35050ce1ac' freq='213'><title>FO-edu List of Countries 4</title></document></documentList><thesaurusList><thesaurus num='3' id='36506' tf='0' df='0' type='RT'><phrase>FRANCOPHONE</phrase></thesaurus></thesaurusList></phindData>";
     447   
     448    Node t = converter_.getDOM(phind_info).getDocumentElement();
     449    app_data.appendChild(doc_.importNode(t, true));
     450   
     451    res.appendChild(data);
     452
     453    return res;
     454    */
Note: See TracChangeset for help on using the changeset viewer.