Ignore:
Timestamp:
1999-11-02T10:53:28+13:00 (25 years ago)
Author:
sjboddie
Message:

added cross-collection searching capability - still needs lots of
work but the basic functionality is there

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/recpt/queryaction.cpp

    r649 r757  
    2828/*
    2929   $Log$
     30   Revision 1.27  1999/11/01 21:53:27  sjboddie
     31   added cross-collection searching capability - still needs lots of
     32   work but the basic functionality is there
     33
    3034   Revision 1.26  1999/10/10 08:14:10  sjboddie
    3135   - metadata now returns mp rather than array
     
    127131#include "querytools.h"
    128132#include "formattools.h"
     133#include "cgiutils.h"
     134
     135void colinfo_t::clear () {
     136  formatlistptr = NULL;
     137  browserptr = NULL;
     138}
     139
     140void QueryResult_t::clear() {
     141  doc.clear();
     142  collection.clear();
     143}
    129144
    130145queryaction::queryaction () {
     
    285300  arg_ainfo.savedarginfo = cgiarginfo::must;
    286301  argsinfo.addarginfo (NULL, arg_ainfo);
     302
     303  // "ccs"
     304  arg_ainfo.shortname = "ccs";
     305  arg_ainfo.longname = "cross collection searching";
     306  arg_ainfo.multiplechar = false;
     307  arg_ainfo.defaultstatus = cgiarginfo::weak;
     308  arg_ainfo.argdefault = "0";
     309  arg_ainfo.savedarginfo = cgiarginfo::must;
     310  argsinfo.addarginfo (NULL, arg_ainfo);
     311
     312  // "ccp"
     313  arg_ainfo.shortname = "ccp";
     314  arg_ainfo.longname = "cross collection page";
     315  arg_ainfo.multiplechar = false;
     316  arg_ainfo.defaultstatus = cgiarginfo::weak;
     317  arg_ainfo.argdefault = "0";
     318  arg_ainfo.savedarginfo = cgiarginfo::must;
     319  argsinfo.addarginfo (NULL, arg_ainfo);
     320
     321  // "cc"
     322  arg_ainfo.shortname = "cc";
     323  arg_ainfo.longname = "collections to search";
     324  arg_ainfo.multiplechar = true;
     325  arg_ainfo.defaultstatus = cgiarginfo::weak;
     326  arg_ainfo.argdefault = "";
     327  arg_ainfo.savedarginfo = cgiarginfo::must;
     328  argsinfo.addarginfo (NULL, arg_ainfo);
     329
    287330}
    288331
     
    349392}
    350393
    351 void queryaction::get_cgihead_info (cgiargsclass &/*args*/, response_t &response,
    352                      text_t &response_data, ostream &/*logout*/) {
     394void queryaction::get_cgihead_info (cgiargsclass &/*args*/, recptprotolistclass * /*protos*/,
     395                    response_t &response, text_t &response_data,
     396                    ostream &/*logout*/) {
    353397  response = content;
    354398  response_data = "text/html";
    355399}
    356400
    357 void queryaction::define_internal_macros (const ColInfoResponse_t &/*collectinfo*/, displayclass &disp,
    358                       cgiargsclass &args, recptproto * /*collectproto*/,
     401void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args,
     402                      recptprotolistclass * /*protos*/,
    359403                      ostream &/*logout*/) {
    360404
     
    398442  if (args.getintarg("s")) quotedquery += "_textstemon_";
    399443  disp.setmacro ("quotedquery", "query", quotedquery);
    400 
    401   // we'll also set num_phrases here so we don't have to parse the
    402   // querystring again later (we need to know this before outputting
    403   // results so we don't include results for documents not containing
    404   // all requested phrases).
    405   num_phrases = phrases.size();
    406 
    407444}
    408445
     
    436473}
    437474
    438 void queryaction::define_external_macros (const ColInfoResponse_t &/*collectinfo*/, displayclass &disp,
    439                       cgiargsclass &args, recptproto *collectproto,
    440                       ostream &logout) {
     475void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args,
     476                      recptprotolistclass *protos, ostream &logout) {
    441477
    442478  // define_external_macros sets the following macros:
     
    450486 
    451487  // can't do anything if collectproto is null (i.e. no collection was specified)
     488  recptproto *collectproto = protos->getrecptproto (args["c"], logout);
    452489  if (collectproto == NULL) return;
    453490
     
    484521}
    485522
    486 bool queryaction::do_action (cgiargsclass &args, const ColInfoResponse_t &collectinfo,
    487                  recptproto *collectproto, displayclass &disp,
     523void queryaction::output_ccp (cgiargsclass &args, recptprotolistclass *protos,
     524                  displayclass &disp, outconvertclass &outconvert,
     525                  ostream &textout, ostream &logout) {
     526
     527  ColInfoResponse_t cinfo;
     528  comerror_t err;
     529  InfoFilterOptionsResponse_t fresponse;
     530  InfoFilterOptionsRequest_t frequest;
     531  frequest.filterName = "QueryFilter";
     532
     533  text_t &index = args["h"];
     534  text_t &subcollection = args["j"];
     535  text_t &language = args["n"];
     536
     537  textout << outconvert << disp << "_query:header_\n"
     538      << "<center>_navigationbar_</center><br>\n"
     539      << "<form name=QueryForm method=get action=\"_gwcgi_\">\n"
     540      << "<input type=hidden name=a value=\"q\">\n"
     541      << "<input type=hidden name=e value=\"_compressedoptions_\">\n"
     542      << "<input type=hidden name=ccp value=\"1\">\n"
     543      << "<center><table width=_pagewidth_><tr valign=top>\n"
     544      << "<td>Select collections to search for \"" << args["q"]
     545      << "\" <i>(index=" << index << " subcollection=" << subcollection
     546      << " language=" << language << ")</i></td>\n"
     547      << "<td><input type=\"submit\" value=\"_query:textbeginsearch_\"></td>\n"
     548      << "</tr></table></center>\n"
     549      << "<center><table width=_pagewidth_>\n";
     550
     551 
     552  recptprotolistclass::iterator rprotolist_here = protos->begin();
     553  recptprotolistclass::iterator rprotolist_end = protos->end();
     554  while (rprotolist_here != rprotolist_end) {
     555    if ((*rprotolist_here).p != NULL) {
     556     
     557      text_tarray collist;
     558      (*rprotolist_here).p->get_collection_list (collist, err, logout);
     559      if (err == noError) {
     560    text_tarray::iterator collist_here = collist.begin();
     561    text_tarray::iterator collist_end = collist.end();
     562    while (collist_here != collist_end) {
     563     
     564      (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout);
     565      //      if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) {
     566      if (err == noError && (cinfo.buildDate > 0)) {
     567       
     568        (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout);
     569        if (err == noError) {
     570         
     571          FilterOption_tmap::const_iterator it;
     572          FilterOption_tmap::const_iterator end = fresponse.filterOptions.end();
     573          if (!index.empty()) {
     574        it = fresponse.filterOptions.find ("Index");
     575        if (it == end) {collist_here ++; continue;}
     576        text_tarray::const_iterator there = (*it).second.validValues.begin();
     577        text_tarray::const_iterator tend = (*it).second.validValues.end();
     578        while (there != tend) {
     579          if (*there == index) break;
     580          there ++;
     581        }
     582        if (there == tend) {collist_here++; continue;}
     583          }
     584          if (!subcollection.empty()) {
     585        it = fresponse.filterOptions.find ("Subcollection");
     586        if (it == end) {collist_here++; continue;}
     587        text_tarray::const_iterator there = (*it).second.validValues.begin();
     588        text_tarray::const_iterator tend = (*it).second.validValues.end();
     589        while (there != tend) {
     590          if (*there == subcollection) break;
     591          there ++;
     592        }
     593        if (there == tend) {collist_here++; continue;}
     594          }
     595          if (!language.empty()) {
     596        it = fresponse.filterOptions.find ("Language");
     597        if (it == end) {collist_here++; continue;}
     598        text_tarray::const_iterator there = (*it).second.validValues.begin();
     599        text_tarray::const_iterator tend = (*it).second.validValues.end();
     600        while (there != tend) {
     601          if (*there == language) break;
     602          there ++;
     603        }
     604        if (there == tend) {collist_here++; continue;}
     605          }
     606       
     607          // we've got a matching collection
     608          textout << outconvert
     609              << "<tr><td><input type=checkbox checked name=cc value=\""
     610              << *collist_here << "\"></td><td>\n";
     611         
     612          if (!cinfo.collectionmeta["collectionname"].empty())
     613        textout << outconvert << disp << cinfo.collectionmeta["collectionname"];
     614          else
     615        textout << outconvert << *collist_here;
     616         
     617          textout << "\n</td><td>";
     618         
     619        }
     620       
     621        textout << "\n</td></tr>\n";
     622      }
     623      collist_here ++;
     624    }
     625      }
     626    }
     627    rprotolist_here ++;
     628  }
     629  textout << outconvert << disp
     630      << "</table></center>\n"
     631      << "</form>\n"
     632      << "_query:footer_\n";
     633 
     634}
     635
     636bool queryaction::do_action (cgiargsclass &args, recptprotolistclass *protos,
     637                 browsermapclass *browsers, displayclass &disp,
    488638                 outconvertclass &outconvert, ostream &textout,
    489639                 ostream &logout) {
    490 
    491   if (formatstring.empty()) {
    492     text_tmap::const_iterator result = collectinfo.format.find("QueryResults");
    493     if (result != collectinfo.format.end())
    494       formatstring = (*result).second;
    495   }
    496 
    497   // see if there's a QueryLinks format option
    498   text_t querylinkmeta;
    499   bool havequerylink = false;
    500   text_tmap::const_iterator it = collectinfo.format.find("QueryLinks");
    501   if (it != collectinfo.format.end()) {
    502     querylinkmeta = (*it).second;
    503     havequerylink = true;
    504   }
    505 
    506   // if we still don't have a format string use the default
    507   if (formatstring.empty())
    508     formatstring = "<td valign=top nowrap>[link]_icontext_[/link]</td><td>[Title]</td>";
    509 
     640 
     641  if (args["ccs"] == 1) {
     642    // cross collection searching
     643    if (args["ccp"] != 1) {
     644      // display the cross collection search page
     645      output_ccp (args, protos, disp, outconvert, textout, logout);
     646    } else {
     647      // query the selected collections
     648      if (!search_multiple_collections (args, protos, browsers, disp, outconvert,
     649                    textout, logout)) return false;
     650    }
     651  } else {
     652    // simply query the current collection
     653    if (!search_single_collection (args, protos, browsers, disp, outconvert,
     654                   textout, logout)) return false;
     655  }
     656
     657  return true;
     658}
     659
     660bool queryaction::search_multiple_collections (cgiargsclass &args, recptprotolistclass *protos,
     661                           browsermapclass *browsers, displayclass &disp,
     662                           outconvertclass &outconvert, ostream &textout,
     663                           ostream &logout) {
     664
     665  text_tarray collections;
     666 
     667  text_t arg_cc = args["cc"];
     668  decode_cgi_arg (arg_cc);
     669  splitchar (arg_cc.begin(), arg_cc.end(), ',', collections);
     670
     671  if (collections.empty()) {
     672    logout << "queryaction::search_multiple_collections: No collections "
     673       << "set for doing multiple query - will search current collection\n";
     674    textout << outconvert << disp << "_query:textwarningnocollections_\n";
     675    return search_single_collection (args, protos, browsers, disp,
     676                     outconvert, textout, logout);
     677  }
     678
     679  // queryaction uses "VList" browser to display results,
     680  // a queries clasification is "Search"
     681  text_t browsertype = "VList";
     682  text_t classification = "Search";
     683
     684  QueryResult_tset results;
     685  map<text_t, colinfo_t, lttext_t> colinfomap;
     686
     687  ColInfoResponse_t cinfo;
     688  comerror_t err;
     689  FilterRequest_t request;
     690  FilterResponse_t response;
     691  request.filterResultOptions = FROID | FRmetadata | FRtermFreq | FRranking;
     692  text_t formattedstring = args["q"];
     693  text_t freqmsg = "_textfreqmsg1_";
     694  int numdocs = 0;
     695  format_querystring (formattedstring, args.getintarg("b"));
     696  set_queryfilter_options (request, formattedstring, args);
     697
     698  // need to retrieve maxdocs matches for each collection
     699  // (will eventually want to tidy this up, do so caching etc.)
     700  OptionValue_t option;
     701  option.name = "StartResults";
     702  option.value = "1";
     703  request.filterOptions.push_back (option);
     704 
     705  option.name = "EndResults";
     706  option.value = args["m"];
     707  request.filterOptions.push_back (option);
     708
     709  text_tarray::iterator col_here = collections.begin();
     710  text_tarray::iterator col_end = collections.end();
     711
     712  while (col_here != col_end) {
     713
     714    request.fields.erase (request.fields.begin(), request.fields.end());
     715    request.getParents = false;
     716
     717    recptproto *collectproto = protos->getrecptproto (*col_here, logout);
     718    if (collectproto == NULL) {
     719      logout << outconvert << "queryaction::search_multiple_collections: " << *col_here
     720         << " collection has a NULL collectproto, ignoring\n";
     721      col_here ++;
     722      continue;
     723    }
     724    collectproto->get_collectinfo (*col_here, cinfo, err, logout);
     725   
     726    browserclass *bptr = browsers->getbrowser (browsertype);
     727
     728    // get the formatstring if there is one
     729    text_t formatstring;
     730    if (!get_formatstring (classification, browsertype,
     731               cinfo.format, formatstring))
     732      formatstring = bptr->get_default_formatstring();
     733
     734    bptr->load_metadata_defaults (request.fields);
     735
     736    format_t *formatlistptr = new format_t();
     737    parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
     738
     739    colinfo_t thiscolinfo;
     740    thiscolinfo.formatlistptr = formatlistptr;
     741    thiscolinfo.browserptr = bptr;
     742    colinfomap[*col_here] = thiscolinfo;
     743   
     744    // do the query
     745    collectproto->filter (*col_here, request, response, err, logout);
     746    if (err != noError) {
     747      outconvertclass text_t2ascii;
     748      logout << text_t2ascii
     749         << "queryaction::search_multiple_collections: call to QueryFilter failed "
     750         << "for " << *col_here << " collection (" << get_comerror_string (err) << ")\n";
     751      return false;
     752    }
     753
     754    TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
     755    TermInfo_tarray::const_iterator end_term = response.termInfo.end();
     756    freqmsg += "<br><b>" + *col_here + "</b>: ";
     757    while (this_term != end_term) {
     758      freqmsg += (*this_term).term + ": " + (*this_term).freq;
     759      if ((this_term + 1) != end_term)
     760    freqmsg += ", ";
     761      this_term ++;
     762    }
     763   
     764    if (response.numDocs > 0) {
     765      numdocs += response.numDocs;
     766
     767      QueryResult_t thisresult;
     768      thisresult.collection = *col_here;
     769      ResultDocInfo_tarray::iterator doc_here = response.docInfo.begin();
     770      ResultDocInfo_tarray::iterator doc_end = response.docInfo.end();
     771      while (doc_here != doc_end) {
     772    thisresult.doc = *doc_here;
     773    results.insert (thisresult);
     774    doc_here ++;
     775      }
     776    }
     777    col_here ++;
     778  }
     779
     780  if (numdocs > 0) disp.setmacro ("freqmsg", "query", freqmsg);
     781  else disp.setmacro("resultline", "query", "_textnodocs_");
     782
     783
     784  QueryResult_tset::iterator res_here = results.begin();
     785  QueryResult_tset::iterator res_end = results.end();
     786  text_tset metadata; // empty !!
     787  bool getParents = false; // don't care !!
     788  recptproto *collectproto = NULL;
     789  bool use_table;
     790  ResultDocInfo_t thisdoc;
     791  format_t *formatlistptr = NULL;
     792  browserclass *browserptr = NULL;
     793
     794  int maxdocs = args.getintarg("m");
     795  int firstdoc = args.getintarg("r");
     796  int hitsperpage = args.getintarg("o");
     797  if (numdocs > maxdocs) numdocs = maxdocs;
     798  if (hitsperpage == -1) hitsperpage = numdocs;
     799
     800  // set up _thisfirst_ and _thislast_ macros
     801  disp.setmacro ("thisfirst", "query", firstdoc);
     802  int thislast = firstdoc + (hitsperpage - 1);
     803  if (thislast > numdocs) thislast = numdocs;
     804  disp.setmacro ("thislast", "query", thislast);
     805
     806  // set up _prevfirst_ and _prevlast_ macros
     807  if (firstdoc > 1) {
     808    disp.setmacro ("prevlast", "query", firstdoc - 1);
     809    int prevfirst = firstdoc - hitsperpage;
     810    if (prevfirst < 1) prevfirst = 1;
     811    disp.setmacro ("prevfirst", "query", prevfirst);
     812  }
     813
     814  // set up _nextfirst_ and _nextlast_ macros
     815  if (thislast < numdocs) {
     816    disp.setmacro ("nextfirst", "query", thislast + 1);
     817    int nextlast = thislast + hitsperpage;
     818    if (nextlast > numdocs) nextlast = numdocs;
     819    disp.setmacro ("nextlast", "query", nextlast);
     820  }
     821
     822  textout << outconvert << disp << "_query:header_\n"
     823      << "_query:content_";
     824
     825  int count = 1;
     826
     827  // output results
     828  while (res_here != res_end) {
     829    if (count < firstdoc) {count ++; res_here ++; continue;}
     830    if (count > thislast) break;
     831    formatlistptr = colinfomap[(*res_here).collection].formatlistptr;
     832    browserptr = colinfomap[(*res_here).collection].browserptr;
     833    thisdoc = (*res_here).doc;
     834    use_table = is_table_content (formatlistptr);
     835    browserptr->output_section_group (thisdoc, args, (*res_here).collection, 0,
     836                          formatlistptr, use_table, metadata, getParents,
     837                          collectproto, disp, outconvert, textout, logout);
     838    //    textout << outconvert << "(ranking: " << (*res_here).doc.ranking << ")\n";
     839    res_here ++;
     840    count ++;
     841  }
     842
     843  textout << outconvert << disp << "_query:footer_";
     844 
     845  // clean up the format_t pointers
     846  map<text_t, colinfo_t, lttext_t>::iterator here =  colinfomap.begin();
     847  map<text_t, colinfo_t, lttext_t>::iterator end =  colinfomap.end();
     848  while (here != end) {
     849    delete ((*here).second.formatlistptr);
     850    here ++;
     851  }
     852  return true;
     853}
     854
     855bool queryaction::search_single_collection (cgiargsclass &args, recptprotolistclass *protos,
     856                        browsermapclass *browsers, displayclass &disp,
     857                        outconvertclass &outconvert, ostream &textout,
     858                        ostream &logout) {
     859
     860  recptproto *collectproto = protos->getrecptproto (args["c"], logout);
    510861  if (collectproto == NULL) {
    511     logout << "queryaction::do_action called with NULL collectproto\n";
    512     textout << outconvert << disp << "_query:header_\n"
    513         << "Error: Attempt to do query without setting collection\n"
    514         << "_query:footer_\n";
    515   } else { 
    516 
    517     FilterRequest_t request;
    518     FilterResponse_t response;
    519     format_t *formatlistptr = new format_t();
    520 
    521     parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
    522 
    523     if (havequerylink)
    524       request.fields.insert (querylinkmeta);
    525 
    526     // do the query
    527     request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
    528     if (!do_query (request, args, collectproto, response, logout))
    529       return false;
     862    logout << outconvert << "queryaction::search_single_collection: " << args["c"]
     863       << " collection has a NULL collectproto\n";
     864    return false;
     865  }
     866
     867  // queryaction uses "VList" browser to display results,
     868  // a queries clasification is "Search"
     869  text_t browsertype = "VList";
     870  text_t classification = "Search";
     871
     872  ColInfoResponse_t cinfo;
     873  comerror_t err;
     874  collectproto->get_collectinfo (args["c"], cinfo, err, logout);
    530875   
    531     // set macros
    532     define_query_macros (args, disp, response);
     876  browserclass *bptr = browsers->getbrowser (browsertype);
     877
     878  // get the formatstring if there is one
     879  text_t formatstring;
     880  if (!get_formatstring (classification, browsertype,
     881             cinfo.format, formatstring))
     882    formatstring = bptr->get_default_formatstring();
     883
     884  FilterRequest_t request;
     885  FilterResponse_t response;
     886  bptr->set_filter_options (request, args);
     887  bptr->load_metadata_defaults (request.fields);
     888
     889  format_t *formatlistptr = new format_t();
     890  parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
    533891   
    534     // output the header
    535     textout << outconvert << disp << "_query:header_\n"
    536         << "_query:content_";
    537 
    538     // output the results
    539     textout << "<table cellspacing=4>\n";
    540     ResultDocInfo_tarray::iterator this_doc = response.docInfo.begin();
    541     ResultDocInfo_tarray::iterator end_doc = response.docInfo.end();
    542 
    543     while (this_doc != end_doc) {
    544       // don't include docs that didn't match phrases (if there were any)
    545       // those that did match will have been sorted to the top
    546       if ((*this_doc).num_phrase_match < num_phrases) break;
    547       textout << "<tr>\n";
    548       if (havequerylink) {
    549     const text_t &qlmeta = (*this_doc).metadata[querylinkmeta].values[0];
    550     if (qlmeta.empty())
    551       textout << outconvert << disp
    552           << get_formatted_string (*this_doc, formatlistptr, "", "_iconblanktext_") << "\n";
    553     else
    554       textout << outconvert << disp
    555           << get_formatted_string (*this_doc, formatlistptr) << "\n";
    556       } else {
    557     textout << outconvert << disp
    558         << get_formatted_string (*this_doc, formatlistptr) << "\n";
    559       }
    560       textout << "</tr>\n";
    561 
    562       this_doc ++;
    563     }
    564     textout << "</table>\n";
    565 
    566     delete (formatlistptr);
    567 
    568     // output the footer
    569     textout << outconvert << disp << "_query:footer_";
    570   }
     892  // do the query
     893  request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
     894  text_t formattedstring = args["q"];
     895  format_querystring (formattedstring, args.getintarg("b"));
     896  set_queryfilter_options (request, formattedstring, args);
     897  collectproto->filter (args["c"], request, response, err, logout);
     898  if (err != noError) {
     899    outconvertclass text_t2ascii;
     900    logout << text_t2ascii
     901       << "queryaction::search_single_collections: call to QueryFilter failed "
     902       << "for " << args["c"] << " collection (" << get_comerror_string (err) << ")\n";
     903    return false;
     904  }
     905 
     906  define_query_macros (args, disp, response);
     907
     908  textout << outconvert << disp << "_query:header_\n"
     909      << "_query:content_";
     910
     911  // output the results
     912  bool use_table = is_table_content (formatlistptr);
     913  bptr->output_section_group (response, args, "", 0, formatlistptr,
     914                  use_table, request.fields, request.getParents,
     915                  collectproto, disp, outconvert, textout, logout);
     916 
     917
     918  textout << outconvert << disp << "_query:footer_";
     919 
     920  delete (formatlistptr);
    571921 
    572922  return true;
Note: See TracChangeset for help on using the changeset viewer.