Changeset 14483

Show
Ignore:
Timestamp:
06.09.2007 11:19:44 (12 years ago)
Author:
xiao
Message:

make MGPPRetrieveWrapper and MGPPSearchWrapper static variables; synchronize the method findPhraseNumberFromWord() for search and getPhraseData() for retrieve.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • greenstone3/trunk/src/java/org/greenstone/gsdl3/service/PhindPhraseBrowse.java

    r13270 r14483  
    2222 
    2323import org.greenstone.mgpp.*; 
    24 import org.w3c.dom.Document;  
    25 import org.w3c.dom.Node;  
    26 import org.w3c.dom.Element;  
    27 import org.w3c.dom.Text;  
     24import org.w3c.dom.Document; 
     25import org.w3c.dom.Node; 
     26import org.w3c.dom.Element; 
     27import org.w3c.dom.Text; 
    2828 
    2929import java.util.Vector; 
     
    3333import org.apache.log4j.*; 
    3434 
    35 /**  
     35/** 
    3636 * PhindServices - the phind phrase browsing service 
    37  *  
     37 * 
    3838 * @author <a href="mailto:kjdon@cs.waikato.ac.nz">Katherine Don</a> 
    3939 * @version $Revision$ 
    4040 */ 
    4141public class PhindPhraseBrowse 
    42     extends ServiceRack { 
    43      
    44      static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.PhindPhraseBrowse.class.getName()); 
    45  
    46     // the services on offer 
    47     private static final String PHIND_SERVICE = "PhindApplet"; 
    48  
    49     private MGPPWrapper mgpp_src=null; 
    50     private String basepath = null; 
    51  
    52     private Element applet_description = null; 
    53      
    54     public PhindPhraseBrowse() { 
    55     this.mgpp_src = new MGPPWrapper(); 
    56     // set up the default params 
    57     this.mgpp_src.setQueryLevel("Document"); 
    58     this.mgpp_src.setReturnLevel("Document"); 
    59     this.mgpp_src.setMaxDocs(5); 
    60     this.mgpp_src.setStem(false); 
    61     this.mgpp_src.setCase(true); 
    62     } 
    63  
    64     public void cleanUp() { 
    65     super.cleanUp(); 
    66     this.mgpp_src.unloadIndexData(); 
    67     } 
    68  
    69     /** configure the service module 
    70      * 
    71      * @param info a DOM Element containing any config info for the service 
    72      * @return true if configured 
    73      */ 
    74     public boolean configure(Element info, Element extra_info) { 
    75  
    76     if (!super.configure(info, extra_info)){ 
    77         return false; 
    78     } 
    79  
    80     logger.info("configuring PhindPhraseBrowse"); 
    81  
    82     // set up short_service_info_ - for now just has name and type 
    83     Element e = this.doc.createElement(GSXML.SERVICE_ELEM); 
    84     e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET); 
    85     e.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE); 
    86     this.short_service_info.appendChild(e); 
    87  
    88     // set up the static applet description 
    89  
    90     applet_description = this.doc.createElement(GSXML.SERVICE_ELEM); 
    91     applet_description.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET); 
    92     applet_description.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE); 
    93  
    94     // add in the applet info for the phind applet 
    95     // need to make this dynamic - library names etc 
    96     // change the applet params - have a single param with the library name 
    97     // this is left blank at this end, and must be filled in by applet action - if the library name is not needed, this param is left out 
    98     // phindcgi param now is not complete - library must be prepended to it. 
    99     String app_info = "<"+GSXML.APPLET_ELEM+" CODEBASE='applet' CODE='org.greenstone.applet.phind.Phind.class' ARCHIVE='phind.jar, xercesImpl.jar, xml-apis.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library' VALUE=''/> <PARAM NAME='phindcgi' VALUE='?"; 
    100     app_info += GSParams.ACTION +"=a&amp;"+GSParams.REQUEST_TYPE +"=r&amp;"+GSParams.SERVICE+"="+PHIND_SERVICE+"&amp;"+GSParams.OUTPUT+"=xml&amp;"+GSParams.RESPONSE_ONLY+"=1'/>"; 
    101     app_info +="<PARAM NAME='collection'   VALUE='"; 
    102     app_info += this.cluster_name; 
    103     app_info += "'/> <PARAM NAME='classifier' VALUE='1'/>  <PARAM NAME='orientation'  VALUE='vertical'/> <PARAM NAME='depth' VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop' VALUE='interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize' VALUE='10'/> <PARAM NAME='blocksize'    VALUE='10'/>The Phind java applet.</"+GSXML.APPLET_ELEM+">"; 
    104      
    105     Document dom = this.converter.getDOM(app_info); 
    106     if (dom==null) { 
    107         logger.error("Couldn't parse applet info"); 
    108         return false; 
    109     } 
    110     Element app_elem = dom.getDocumentElement(); 
    111     applet_description.appendChild(this.doc.importNode(app_elem, true)); 
    112  
    113     return true; 
    114     } 
    115  
    116     protected Element getServiceDescription(String service, String lang, String subset) { 
    117     if (!service.equals(PHIND_SERVICE)) { 
    118         return null; 
    119     } 
    120     Element describe = (Element) applet_description.cloneNode(true); 
    121     describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME,  getTextString(PHIND_SERVICE+".name", lang))); 
    122     describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION,  getTextString(PHIND_SERVICE+".description", lang))); 
    123     return describe; 
    124     } 
    125  
    126     protected Element processPhindApplet(Element request) { 
    127      
    128     Element param_elem = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    129     HashMap params = GSXML.extractParams(param_elem, false); 
    130      
    131     long first_e = Long.parseLong((String)params.get("pfe")); 
    132     long last_e = Long.parseLong((String)params.get("ple")); 
    133     long first_l = Long.parseLong((String)params.get("pfl")); 
    134     long last_l = Long.parseLong((String)params.get("pll")); 
    135     long first_d = Long.parseLong((String)params.get("pfd")); 
    136     long last_d = Long.parseLong((String)params.get("pld")); 
    137      
    138     long phrase; 
    139     String phrase_str = (String)params.get("ppnum"); 
    140     if (phrase_str == null || phrase_str.equals("")) { 
    141         phrase=0; 
    142     } else { 
    143         phrase = Long.parseLong(phrase_str); 
    144     } 
    145     String word = (String)params.get("pptext"); 
    146     String phind_index = (String)params.get("pc"); 
    147     // the location of the mgpp database files 
    148     this.basepath = GSFile.phindBaseDir(this.site_home, this.cluster_name, phind_index); 
    149  
    150     // the result element 
    151     Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 
    152     result.setAttribute(GSXML.FROM_ATT, PHIND_SERVICE); 
    153     result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 
    154  
    155     // applet result info must be in appletInfo element 
    156     Element applet_data = this.doc.createElement(GSXML.APPLET_DATA_ELEM); 
    157     result.appendChild(applet_data); 
    158     Element phind_data = this.doc.createElement("phindData"); 
    159     applet_data.appendChild(phind_data); 
    160  
    161  
    162     // if we dont know the phrase number, look it up 
    163     if (phrase == 0) { 
    164         if (word==null || word.equals("")) { 
    165         Element error = phindError("no word or phrase"); 
    166         phind_data.appendChild(error); 
    167         return result; 
    168         } 
    169         phrase = findPhraseNumberFromWord( word); 
    170     } 
    171     if (phrase==0) { 
    172         // the word is not in the collection 
    173         // return a phind error string 
    174         Element error = phindError("the term "+word+" is not in the collection"); 
    175         phind_data.appendChild(error); 
    176         return result; 
    177     } 
    178      
    179     // get the phrase data into the phind_data node 
    180     getPhraseData(phind_data, phrase, first_l, last_l, 
    181               first_e, last_e,  first_d, last_d); 
    182     return result; 
    183      
    184      
    185     }// processPhindApplet 
    186      
    187     protected long findPhraseNumberFromWord(String word) { 
    188  
    189     // set the mgpp index data - we are looking up pword 
    190     this.mgpp_src.loadIndexData(this.basepath+File.separatorChar+"pword"); 
    191  
    192     this.mgpp_src.runQuery(word); 
    193  
    194     MGPPQueryResult res = this.mgpp_src.getQueryResult(); 
    195     Vector docs = res.getDocs(); 
    196     if (docs.size()==0) { 
    197         // phrase not found 
    198         return 0; 
    199     } 
    200     MGPPDocInfo doc = (MGPPDocInfo)docs.firstElement(); 
    201     return doc.num_; 
    202     } 
    203  
    204     protected boolean getPhraseData(Element phind_data, 
    205                     long phrase, long first_l, long last_l, 
    206                     long first_e, long last_e, long first_d,  
    207                     long last_d) { 
    208  
    209     String record = this.mgpp_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
    210                           phrase); 
    211     if (record.equals("")) { 
    212         Element error = phindError("somethings gone wrong - we haven't got a record for phrase number "+phrase); 
    213         phind_data.appendChild(error); 
    214         return false; 
    215     } 
    216      
    217     // parse the record - its in gordons cryptic form 
    218     // ":word:tf:ef:df:el:dl:lf:ll" 
    219     // el: e,e,e 
    220     // dl: d;f,d;f, 
    221     // lf and ll may be null 
    222     // l: type,dest, dest; type,dest,dest 
    223  
    224     // ignore everything up to and including first colon (has 
    225     // <Document>3505: at the start) 
    226     record = record.substring(record.indexOf(':')+1); 
    227  
    228     // split on ':' 
    229     String [] fields = record.split(":"); 
    230     String word = fields[0]; 
    231     String tf = fields[1]; 
    232     String ef = fields[2]; 
    233     String df = fields[3]; 
    234  
    235      
    236     String expansions = fields[4]; 
    237     String documents = fields[5]; 
    238     String lf = "0"; 
    239     String linklist = ""; 
    240     if (fields.length > 7) {// have thesaurus stuff 
    241         lf =fields[6]; 
    242         linklist = fields[7]; 
    243     } 
    244      
    245     // the phindData attributes and phrase 
    246     phind_data.setAttribute("id", Long.toString(phrase)); 
    247     phind_data.setAttribute("df", df); 
    248     phind_data.setAttribute("ef", ef); 
    249     phind_data.setAttribute("lf", lf); 
    250     phind_data.setAttribute("tf", tf); 
    251     GSXML.createTextElement(this.doc, "phrase", word); 
    252  
    253     addExpansionList(phind_data, expansions, word, ef, first_e, last_e); 
    254     addDocumentList(phind_data, documents, word, df, first_d, last_d); 
    255     if (!lf.equals("0")) { 
    256         addThesaurusList(phind_data, linklist, word, lf, first_l, last_l); 
    257     } 
    258     return true; 
    259     } 
    260  
    261     protected boolean addExpansionList( Element phind_data, String record,  
    262                        String word, 
    263                        String freq, 
    264                        long first, long last) { 
    265  
    266     Element expansion_list = this.doc.createElement("expansionList"); 
    267     phind_data.appendChild(expansion_list); 
    268     expansion_list.setAttribute("length", freq); 
    269     expansion_list.setAttribute("start", Long.toString(first)); 
    270     expansion_list.setAttribute("end", Long.toString(last)); 
    271  
    272     // get the list of strings 
    273     String [] expansions = record.split(","); 
    274     int length = expansions.length; 
    275     if (length < last) last = length; 
    276     for (long i = first; i < last; i++) { 
    277         long num  = Long.parseLong(expansions[(int)i]); 
    278         Element expansion = getExpansion( num, word); 
    279         expansion.setAttribute("num", Long.toString(i)); 
    280         expansion_list.appendChild(expansion); 
    281     } 
    282     return true; 
    283     } 
    284      
    285     protected Element getExpansion(long phrase_num,  
    286                    String orig_phrase) { 
    287      
    288     // look up the phrase in the pdata thingy 
    289     String record = this.mgpp_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
    290                           phrase_num); 
    291  
    292     if (record ==null || record.equals("")) return null; 
    293  
    294     // ignore everything up to and including first colon  
    295     record = record.substring(record.indexOf(':')+1); 
    296  
    297     String [] fields = record.split(":"); 
    298     String phrase = fields[0]; 
    299     String tf = fields[1]; 
    300     //String ef = fields[2]; dont use this 
    301     String df = fields[3]; 
    302  
    303     Element expansion = this.doc.createElement("expansion"); 
    304     expansion.setAttribute("tf", tf); 
    305     expansion.setAttribute("df", df); 
    306     expansion.setAttribute("id", Long.toString(phrase_num)); 
    307  
    308     // get teh suffix and prefix 
    309     String [] ends = splitPhraseOnWord(phrase, orig_phrase); 
    310     if (!ends[0].equals("")) { 
    311         expansion.appendChild(GSXML.createTextElement(this.doc, "prefix", ends[0])); 
    312     } 
    313     if (!ends[1].equals("")) { 
    314         expansion.appendChild(GSXML.createTextElement(this.doc, "suffix", ends[1])); 
    315     } 
    316  
    317     return expansion; 
    318  
    319     } 
    320  
    321     protected boolean addDocumentList(Element phind_data, String record,  
    322                       String word, 
    323                       String freq, 
    324                       long first, long last) { 
    325  
    326     Element document_list = this.doc.createElement("documentList"); 
    327     phind_data.appendChild(document_list); 
    328     document_list.setAttribute("length", freq); 
    329     document_list.setAttribute("start", Long.toString(first)); 
    330     document_list.setAttribute("end", Long.toString(last)); 
    331  
    332     // get the list of doc,freq 
    333     String [] doc_freqs = record.split(";"); 
    334     int length = doc_freqs.length; 
    335     if (length<last) last=length; 
    336  
    337     for (long i = first; i < last; i++) { 
    338         String doc_elem = doc_freqs[(int)i]; 
    339         int p = doc_elem.indexOf(','); 
    340         long doc_num; 
    341         String doc_freq; 
    342         if (p == -1) { // there is no freq in the record 
    343         doc_num =Long.parseLong(doc_elem); 
    344         doc_freq = "1"; 
    345         } else { 
    346         doc_num = Long.parseLong(doc_elem.substring(0,p)); 
    347         doc_freq = doc_elem.substring(p+1); 
    348         } 
    349         Element document = getDocument( doc_num); 
    350         document.setAttribute("freq", doc_freq); 
    351         document.setAttribute("num", Long.toString(i)); 
    352         document_list.appendChild(document); 
    353     } 
    354  
    355      
    356     return true; 
    357     } 
    358  
    359  
    360     protected Element getDocument(long doc_num) { 
    361      
    362     // look up the phrase in the docs thingy 
    363     String record = this.mgpp_src.getDocument(this.basepath+File.separatorChar+"docs", "Document", 
    364                           doc_num); 
    365      
    366     if (record ==null || record.equals("")) return null; 
    367      
    368     // ignore everything up to and including first \t 
    369     record = record.substring(record.indexOf('\t')+1); 
    370  
    371     String [] fields = record.split("\t"); 
    372     String hash = fields[0]; 
    373     String title = fields[1]; 
    374  
    375     Element d = this.doc.createElement("document"); 
    376     d.setAttribute("hash", hash); 
    377     d.appendChild(GSXML.createTextElement(this.doc, "title", title)); 
    378      
    379     return d; 
    380  
    381     } 
    382     protected boolean addThesaurusList(Element phind_data, String record, 
    383                        String word, 
    384                        String freq, 
    385                        long first, long last) { 
    386  
    387  
    388     Element thesaurus_list = this.doc.createElement("thesaurusList"); 
    389     phind_data.appendChild(thesaurus_list); 
    390     thesaurus_list.setAttribute("length", freq); 
    391     thesaurus_list.setAttribute("start", Long.toString(first)); 
    392     thesaurus_list.setAttribute("end", Long.toString(last)); 
    393      
    394     // get the list of type,dest,dest 
    395     String [] links = record.split(";"); 
    396     int length = links.length; 
    397     long index = 0; 
    398     for (int i = 0; i < length; i++) { // go through the entries 
    399         String link_info = links[(int)i]; 
    400         String [] items = link_info.split(","); 
    401         // the first entry is teh type 
    402         String type = items[0]; 
    403         for (int j = 1; j<items.length; j++, index++) { 
    404         if (index >= first && index < last) { // only output the ones we want 
    405             long phrase = Long.parseLong(items[j]); 
    406             Element t = getThesaurus(phrase); 
    407             t.setAttribute("type", type); 
    408             thesaurus_list.appendChild(t); 
    409         } 
    410         } 
    411     } 
    412  
    413     return true; 
    414     } 
    415  
    416     protected Element getThesaurus(long phrase_num) { 
    417  
    418     // look up the phrase in the pdata thingy 
    419     String record = this.mgpp_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
    420                           phrase_num); 
    421  
    422     if (record ==null || record.equals("")) return null; 
    423  
    424     // ignore everything up to and including first colon  
    425     record = record.substring(record.indexOf(':')+1); 
    426  
    427     String [] fields = record.split(":"); 
    428     String phrase = fields[0]; 
    429     String tf = fields[1]; 
    430     //String ef = fields[2]; dont use this 
    431     String df = fields[3]; 
    432  
    433     Element thesaurus = this.doc.createElement("thesaurus"); 
    434     thesaurus.setAttribute("tf", tf); 
    435     thesaurus.setAttribute("df", df); 
    436     thesaurus.setAttribute("id", Long.toString(phrase_num)); 
    437     thesaurus.appendChild(GSXML.createTextElement(this.doc, "phrase", phrase)); 
    438     return thesaurus; 
    439  
    440     } 
    441  
    442     /** returns an array of two elements - the prefix and the suffix*/ 
    443     protected String [] splitPhraseOnWord(String phrase, String word) { 
    444      
    445     if (word.equals("")) { 
    446          
    447         String [] res =  {phrase, ""}; 
    448         return res; 
    449     } 
    450     // use 2 so that we only split on the first occurrance. trailing empty strings should be included  
    451     String [] result = phrase.split(word, 2); 
    452     return result; 
    453      
    454     } 
    455  
    456     protected Element phindError(String message) { 
    457     Element e = this.doc.createElement("phindError"); 
    458     Text t = this.doc.createTextNode(message); 
    459     e.appendChild(t); 
    460     return e; 
    461     } 
    462      
     42  extends ServiceRack { 
     43   
     44  static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.PhindPhraseBrowse.class.getName()); 
     45   
     46  // the services on offer 
     47  private static final String PHIND_SERVICE = "PhindApplet"; 
     48   
     49  private static MGPPRetrieveWrapper mgpp_retrieve_src=null; 
     50  private static MGPPSearchWrapper mgpp_search_src=null; 
     51  private String basepath = null; 
     52   
     53  private Element applet_description = null; 
     54   
     55  public PhindPhraseBrowse() { 
     56    if(this.mgpp_retrieve_src == null) { 
     57      this.mgpp_retrieve_src = new MGPPRetrieveWrapper(); 
     58    } 
     59    if(this.mgpp_search_src == null) { 
     60      this.mgpp_search_src = new MGPPSearchWrapper(); 
     61    } 
     62    // set up the default params 
     63    this.mgpp_search_src.setQueryLevel("Document"); 
     64    this.mgpp_search_src.setReturnLevel("Document"); 
     65    this.mgpp_search_src.setMaxDocs(5); 
     66    this.mgpp_search_src.setStem(false); 
     67    this.mgpp_search_src.setCase(true); 
     68  } 
     69   
     70  public void cleanUp() { 
     71    super.cleanUp(); 
     72    this.mgpp_retrieve_src.unloadIndexData(); 
     73    this.mgpp_search_src.unloadIndexData(); 
     74  } 
     75   
     76  /** configure the service module 
     77   * 
     78   * @param info a DOM Element containing any config info for the service 
     79   * @return true if configured 
     80   */ 
     81  public boolean configure(Element info, Element extra_info) { 
     82     
     83    if (!super.configure(info, extra_info)){ 
     84      return false; 
     85    } 
     86     
     87    logger.info("configuring PhindPhraseBrowse"); 
     88     
     89    // set up short_service_info_ - for now just has name and type 
     90    Element e = this.doc.createElement(GSXML.SERVICE_ELEM); 
     91    e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET); 
     92    e.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE); 
     93    this.short_service_info.appendChild(e); 
     94     
     95    // set up the static applet description 
     96     
     97    applet_description = this.doc.createElement(GSXML.SERVICE_ELEM); 
     98    applet_description.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET); 
     99    applet_description.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE); 
     100     
     101    // add in the applet info for the phind applet 
     102    // need to make this dynamic - library names etc 
     103    // change the applet params - have a single param with the library name 
     104    // this is left blank at this end, and must be filled in by applet action - if the library name is not needed, this param is left out 
     105    // phindcgi param now is not complete - library must be prepended to it. 
     106    String app_info = "<"+GSXML.APPLET_ELEM+" CODEBASE='applet' CODE='org.greenstone.applet.phind.Phind.class' ARCHIVE='phind.jar, xercesImpl.jar, xml-apis.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library' VALUE=''/> <PARAM NAME='phindcgi' VALUE='?"; 
     107    app_info += GSParams.ACTION +"=a&amp;"+GSParams.REQUEST_TYPE +"=r&amp;"+GSParams.SERVICE+"="+PHIND_SERVICE+"&amp;"+GSParams.OUTPUT+"=xml&amp;"+GSParams.RESPONSE_ONLY+"=1'/>"; 
     108    app_info +="<PARAM NAME='collection'   VALUE='"; 
     109    app_info += this.cluster_name; 
     110    app_info += "'/> <PARAM NAME='classifier' VALUE='1'/>  <PARAM NAME='orientation'  VALUE='vertical'/> <PARAM NAME='depth' VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop' VALUE='interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize' VALUE='10'/> <PARAM NAME='blocksize'    VALUE='10'/>The Phind java applet.</"+GSXML.APPLET_ELEM+">"; 
     111     
     112    Document dom = this.converter.getDOM(app_info); 
     113    if (dom==null) { 
     114      logger.error("Couldn't parse applet info"); 
     115      return false; 
     116    } 
     117    Element app_elem = dom.getDocumentElement(); 
     118    applet_description.appendChild(this.doc.importNode(app_elem, true)); 
     119     
     120    return true; 
     121  } 
     122   
     123  protected Element getServiceDescription(String service, String lang, String subset) { 
     124    if (!service.equals(PHIND_SERVICE)) { 
     125      return null; 
     126    } 
     127    Element describe = (Element) applet_description.cloneNode(true); 
     128    describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME,  getTextString(PHIND_SERVICE+".name", lang))); 
     129    describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION,  getTextString(PHIND_SERVICE+".description", lang))); 
     130    return describe; 
     131  } 
     132   
     133  protected Element processPhindApplet(Element request) { 
     134     
     135    Element param_elem = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     136    HashMap params = GSXML.extractParams(param_elem, false); 
     137     
     138    long first_e = Long.parseLong((String)params.get("pfe")); 
     139    long last_e = Long.parseLong((String)params.get("ple")); 
     140    long first_l = Long.parseLong((String)params.get("pfl")); 
     141    long last_l = Long.parseLong((String)params.get("pll")); 
     142    long first_d = Long.parseLong((String)params.get("pfd")); 
     143    long last_d = Long.parseLong((String)params.get("pld")); 
     144     
     145    long phrase; 
     146    String phrase_str = (String)params.get("ppnum"); 
     147    if (phrase_str == null || phrase_str.equals("")) { 
     148      phrase=0; 
     149    } else { 
     150      phrase = Long.parseLong(phrase_str); 
     151    } 
     152    String word = (String)params.get("pptext"); 
     153    String phind_index = (String)params.get("pc"); 
     154    // the location of the mgpp database files 
     155    this.basepath = GSFile.phindBaseDir(this.site_home, this.cluster_name, phind_index); 
     156     
     157    // the result element 
     158    Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 
     159    result.setAttribute(GSXML.FROM_ATT, PHIND_SERVICE); 
     160    result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 
     161     
     162    // applet result info must be in appletInfo element 
     163    Element applet_data = this.doc.createElement(GSXML.APPLET_DATA_ELEM); 
     164    result.appendChild(applet_data); 
     165    Element phind_data = this.doc.createElement("phindData"); 
     166    applet_data.appendChild(phind_data); 
     167     
     168     
     169    // if we dont know the phrase number, look it up 
     170    if (phrase == 0) { 
     171      if (word==null || word.equals("")) { 
     172        Element error = phindError("no word or phrase"); 
     173        phind_data.appendChild(error); 
     174        return result; 
     175      } 
     176      phrase = findPhraseNumberFromWord( word); 
     177    } 
     178    if (phrase==0) { 
     179      // the word is not in the collection 
     180      // return a phind error string 
     181      Element error = phindError("the term "+word+" is not in the collection"); 
     182      phind_data.appendChild(error); 
     183      return result; 
     184    } 
     185     
     186    // get the phrase data into the phind_data node 
     187    getPhraseData(phind_data, phrase, first_l, last_l, 
     188      first_e, last_e,  first_d, last_d); 
     189    return result; 
     190     
     191     
     192  }// processPhindApplet 
     193   
     194  protected long findPhraseNumberFromWord(String word) { 
     195    synchronized (mgpp_search_src) { 
     196        // set the mgpp index data - we are looking up pword 
     197        mgpp_search_src.loadIndexData(this.basepath+File.separatorChar+"pword"); 
     198         
     199        mgpp_search_src.runQuery(word); 
     200         
     201        MGPPQueryResult res = mgpp_search_src.getQueryResult(); 
     202        Vector docs = res.getDocs(); 
     203        if (docs.size()==0) { 
     204            // phrase not found 
     205            return 0; 
     206        } 
     207        MGPPDocInfo doc = (MGPPDocInfo)docs.firstElement(); 
     208        return doc.num_; 
     209    } 
     210  } 
     211   
     212  protected boolean getPhraseData(Element phind_data, 
     213    long phrase, long first_l, long last_l, 
     214    long first_e, long last_e, long first_d, 
     215    long last_d) { 
     216     
     217      synchronized (mgpp_retrieve_src) { 
     218    String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
     219      phrase); 
     220    if (record.equals("")) { 
     221      Element error = phindError("somethings gone wrong - we haven't got a record for phrase number "+phrase); 
     222      phind_data.appendChild(error); 
     223      return false; 
     224    } 
     225     
     226    // parse the record - its in gordons cryptic form 
     227    // ":word:tf:ef:df:el:dl:lf:ll" 
     228    // el: e,e,e 
     229    // dl: d;f,d;f, 
     230    // lf and ll may be null 
     231    // l: type,dest, dest; type,dest,dest 
     232     
     233    // ignore everything up to and including first colon (has 
     234    // <Document>3505: at the start) 
     235    record = record.substring(record.indexOf(':')+1); 
     236     
     237    // split on ':' 
     238    String [] fields = record.split(":"); 
     239    String word = fields[0]; 
     240    String tf = fields[1]; 
     241    String ef = fields[2]; 
     242    String df = fields[3]; 
     243     
     244     
     245    String expansions = fields[4]; 
     246    String documents = fields[5]; 
     247    String lf = "0"; 
     248    String linklist = ""; 
     249    if (fields.length > 7) {// have thesaurus stuff 
     250      lf =fields[6]; 
     251      linklist = fields[7]; 
     252    } 
     253     
     254    // the phindData attributes and phrase 
     255    phind_data.setAttribute("id", Long.toString(phrase)); 
     256    phind_data.setAttribute("df", df); 
     257    phind_data.setAttribute("ef", ef); 
     258    phind_data.setAttribute("lf", lf); 
     259    phind_data.setAttribute("tf", tf); 
     260    GSXML.createTextElement(this.doc, "phrase", word); 
     261     
     262    addExpansionList(phind_data, expansions, word, ef, first_e, last_e); 
     263    addDocumentList(phind_data, documents, word, df, first_d, last_d); 
     264    if (!lf.equals("0")) { 
     265      addThesaurusList(phind_data, linklist, word, lf, first_l, last_l); 
     266    } 
     267    return true; 
     268      } 
     269  } 
     270   
     271  protected boolean addExpansionList( Element phind_data, String record, 
     272    String word, 
     273    String freq, 
     274    long first, long last) { 
     275     
     276    Element expansion_list = this.doc.createElement("expansionList"); 
     277    phind_data.appendChild(expansion_list); 
     278    expansion_list.setAttribute("length", freq); 
     279    expansion_list.setAttribute("start", Long.toString(first)); 
     280    expansion_list.setAttribute("end", Long.toString(last)); 
     281     
     282    // get the list of strings 
     283    String [] expansions = record.split(","); 
     284    int length = expansions.length; 
     285    if (length < last) last = length; 
     286    for (long i = first; i < last; i++) { 
     287      long num  = Long.parseLong(expansions[(int)i]); 
     288      Element expansion = getExpansion( num, word); 
     289      expansion.setAttribute("num", Long.toString(i)); 
     290      expansion_list.appendChild(expansion); 
     291    } 
     292    return true; 
     293  } 
     294   
     295  protected Element getExpansion(long phrase_num, 
     296    String orig_phrase) { 
     297     
     298    // look up the phrase in the pdata thingy 
     299    String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
     300      phrase_num); 
     301     
     302    if (record ==null || record.equals("")) return null; 
     303     
     304    // ignore everything up to and including first colon 
     305    record = record.substring(record.indexOf(':')+1); 
     306     
     307    String [] fields = record.split(":"); 
     308    String phrase = fields[0]; 
     309    String tf = fields[1]; 
     310    //String ef = fields[2]; dont use this 
     311    String df = fields[3]; 
     312     
     313    Element expansion = this.doc.createElement("expansion"); 
     314    expansion.setAttribute("tf", tf); 
     315    expansion.setAttribute("df", df); 
     316    expansion.setAttribute("id", Long.toString(phrase_num)); 
     317     
     318    // get teh suffix and prefix 
     319    String [] ends = splitPhraseOnWord(phrase, orig_phrase); 
     320    if (!ends[0].equals("")) { 
     321      expansion.appendChild(GSXML.createTextElement(this.doc, "prefix", ends[0])); 
     322    } 
     323    if (!ends[1].equals("")) { 
     324      expansion.appendChild(GSXML.createTextElement(this.doc, "suffix", ends[1])); 
     325    } 
     326     
     327    return expansion; 
     328     
     329  } 
     330   
     331  protected boolean addDocumentList(Element phind_data, String record, 
     332    String word, 
     333    String freq, 
     334    long first, long last) { 
     335     
     336    Element document_list = this.doc.createElement("documentList"); 
     337    phind_data.appendChild(document_list); 
     338    document_list.setAttribute("length", freq); 
     339    document_list.setAttribute("start", Long.toString(first)); 
     340    document_list.setAttribute("end", Long.toString(last)); 
     341     
     342    // get the list of doc,freq 
     343    String [] doc_freqs = record.split(";"); 
     344    int length = doc_freqs.length; 
     345    if (length<last) last=length; 
     346     
     347    for (long i = first; i < last; i++) { 
     348      String doc_elem = doc_freqs[(int)i]; 
     349      int p = doc_elem.indexOf(','); 
     350      long doc_num; 
     351      String doc_freq; 
     352      if (p == -1) { // there is no freq in the record 
     353        doc_num =Long.parseLong(doc_elem); 
     354        doc_freq = "1"; 
     355      } else { 
     356        doc_num = Long.parseLong(doc_elem.substring(0,p)); 
     357        doc_freq = doc_elem.substring(p+1); 
     358      } 
     359      Element document = getDocument( doc_num); 
     360      document.setAttribute("freq", doc_freq); 
     361      document.setAttribute("num", Long.toString(i)); 
     362      document_list.appendChild(document); 
     363    } 
     364     
     365     
     366    return true; 
     367  } 
     368   
     369   
     370  protected Element getDocument(long doc_num) { 
     371     
     372    // look up the phrase in the docs thingy 
     373    String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"docs", "Document", 
     374      doc_num); 
     375     
     376    if (record ==null || record.equals("")) return null; 
     377     
     378    // ignore everything up to and including first \t 
     379    record = record.substring(record.indexOf('\t')+1); 
     380     
     381    String [] fields = record.split("\t"); 
     382    String hash = fields[0]; 
     383    String title = fields[1]; 
     384     
     385    Element d = this.doc.createElement("document"); 
     386    d.setAttribute("hash", hash); 
     387    d.appendChild(GSXML.createTextElement(this.doc, "title", title)); 
     388     
     389    return d; 
     390     
     391  } 
     392  protected boolean addThesaurusList(Element phind_data, String record, 
     393    String word, 
     394    String freq, 
     395    long first, long last) { 
     396     
     397     
     398    Element thesaurus_list = this.doc.createElement("thesaurusList"); 
     399    phind_data.appendChild(thesaurus_list); 
     400    thesaurus_list.setAttribute("length", freq); 
     401    thesaurus_list.setAttribute("start", Long.toString(first)); 
     402    thesaurus_list.setAttribute("end", Long.toString(last)); 
     403     
     404    // get the list of type,dest,dest 
     405    String [] links = record.split(";"); 
     406    int length = links.length; 
     407    long index = 0; 
     408    for (int i = 0; i < length; i++) { // go through the entries 
     409      String link_info = links[(int)i]; 
     410      String [] items = link_info.split(","); 
     411      // the first entry is teh type 
     412      String type = items[0]; 
     413      for (int j = 1; j<items.length; j++, index++) { 
     414        if (index >= first && index < last) { // only output the ones we want 
     415          long phrase = Long.parseLong(items[j]); 
     416          Element t = getThesaurus(phrase); 
     417          t.setAttribute("type", type); 
     418          thesaurus_list.appendChild(t); 
     419        } 
     420      } 
     421    } 
     422     
     423    return true; 
     424  } 
     425   
     426  protected Element getThesaurus(long phrase_num) { 
     427     
     428    // look up the phrase in the pdata thingy 
     429    String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document", 
     430      phrase_num); 
     431     
     432    if (record ==null || record.equals("")) return null; 
     433     
     434    // ignore everything up to and including first colon 
     435    record = record.substring(record.indexOf(':')+1); 
     436     
     437    String [] fields = record.split(":"); 
     438    String phrase = fields[0]; 
     439    String tf = fields[1]; 
     440    //String ef = fields[2]; dont use this 
     441    String df = fields[3]; 
     442     
     443    Element thesaurus = this.doc.createElement("thesaurus"); 
     444    thesaurus.setAttribute("tf", tf); 
     445    thesaurus.setAttribute("df", df); 
     446    thesaurus.setAttribute("id", Long.toString(phrase_num)); 
     447    thesaurus.appendChild(GSXML.createTextElement(this.doc, "phrase", phrase)); 
     448    return thesaurus; 
     449     
     450  } 
     451   
     452  /** returns an array of two elements - the prefix and the suffix*/ 
     453  protected String [] splitPhraseOnWord(String phrase, String word) { 
     454     
     455    if (word.equals("")) { 
     456       
     457      String [] res =  {phrase, ""}; 
     458      return res; 
     459    } 
     460    // use 2 so that we only split on the first occurrance. trailing empty strings should be included 
     461    String [] result = phrase.split(word, 2); 
     462    return result; 
     463     
     464  } 
     465   
     466  protected Element phindError(String message) { 
     467    Element e = this.doc.createElement("phindError"); 
     468    Text t = this.doc.createTextNode(message); 
     469    e.appendChild(t); 
     470    return e; 
     471  } 
     472   
    463473} 
    464474