Changeset 12655


Ignore:
Timestamp:
2006-09-04T09:56:41+12:00 (18 years ago)
Author:
mdewsnip
Message:

Lucene now returns just the results we're interested in (ie. those for a page), rather than all the results, making searching significantly faster in many cases. Many thanks to John Thompson and DL Consulting Ltd.

Location:
trunk/gsdl/src/colservr
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/lucenequeryfilter.cpp

    r12421 r12655  
    176176    vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
    177177
    178     if (endresults == -1) endresults = MAXNUMDOCS;
    179     while (docorder_here != docorder_end) {
    180       if (resultnum > endresults) break;
     178    // Now handled by Lucene directly
     179    //if (endresults == -1) endresults = MAXNUMDOCS;
     180
     181    while (docorder_here != docorder_end)
     182      {
     183        // Now handled by Lucene directly
     184        //if (resultnum > endresults) break;
    181185     
    182       // translate the document number
    183       if (!translate(gdbmptr, *docorder_here, trans_OID)) {
    184     logout << text_t2ascii
    185            << "warning: could not translate lucene document number \""
    186            << *docorder_here << "\" to OID.\n\n";
    187    
    188       } else {
    189     docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
    190 
    191     // see if there is a result for this number,
    192     // if it is in the request set (or the request set is empty)
    193     if (docset_here != queryresults.docs.docset.end() &&
    194         (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
    195       if (resultnum >= startresults) {
    196         // add this document
    197         resultdoc.OID = trans_OID;
    198         resultdoc.result_num = resultnum;
    199         resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
    200 
    201         response.docInfo.push_back (resultdoc);
    202       }
    203      
    204       ++resultnum;
    205     }
    206       } // else
    207      
    208       ++docorder_here;
    209     }
     186        // translate the document number
     187        if (!translate(gdbmptr, *docorder_here, trans_OID))
     188          {
     189            logout << text_t2ascii
     190                   << "warning: could not translate lucene document number \""
     191                   << *docorder_here << "\" to OID.\n\n";
     192           
     193          }
     194        else
     195          {
     196            docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
     197
     198            // see if there is a result for this number,
     199            // if it is in the request set (or the request set is empty)
     200            if (docset_here != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, trans_OID)))
     201              {
     202                // Now handled by Lucene directly
     203                //if (resultnum >= startresults) {
     204
     205                // add this document
     206                resultdoc.OID = trans_OID;
     207                resultdoc.result_num = resultnum;
     208                resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
     209               
     210                response.docInfo.push_back (resultdoc);
     211                //}
     212                ++resultnum;
     213              }
     214          } // else
     215       
     216        ++docorder_here;
     217      }
    210218  } // if need matching docs
    211 
     219 
    212220  // assemble the term results
    213221  if (need_term_info(request.filterResultOptions)) {
  • trunk/gsdl/src/colservr/lucenesearch.cpp

    r12419 r12655  
    142142    }
    143143
     144  // New code to allow Lucene to do paging of search results. This should
     145  // substantially improve performance as we don't have to return all 12000
     146  // hits if we only need the first 20!
     147  if (queryparams.startresults && queryparams.endresults)
     148    {
     149      cerr << "Start Results: " << queryparams.startresults << endl;
     150      cmd += (text_t)" -startresults " + queryparams.startresults;
     151      cerr << "End Results: " << queryparams.endresults << endl;
     152      cmd += (text_t)" -endresults " + queryparams.endresults;
     153    }
     154
    144155  cerr << "Lucene command: " << cmd << endl;
    145156 
  • trunk/gsdl/src/colservr/queryfilter.cpp

    r12410 r12655  
    170170  }
    171171
     172  // Store the start and end results in the query too, as lucene now needs to
     173  // pass them through to the Java
     174  query.startresults = startresults;
     175  query.endresults = endresults;
     176
    172177  // add the last query
    173178  query_params.push_back (query);
  • trunk/gsdl/src/colservr/queryinfo.cpp

    r12421 r12655  
    5050  sortfield.clear();
    5151  fuzzysearch = 0; // 0 = not fuzzy, 1 = fuzzy
     52  startresults = 1; // all
     53  endresults = 10; // all
    5254}
    5355
     
    7072  sortfield = q.sortfield;
    7173  fuzzysearch = q.fuzzysearch;
     74  startresults = q.startresults;
     75  endresults = q.endresults;
    7276  return *this;
    7377}
     
    9094      (x.filterstring == y.filterstring) &&
    9195          (x.sortfield == y.sortfield) &&
    92           (x.fuzzysearch == y.fuzzysearch));
     96          (x.fuzzysearch == y.fuzzysearch) &&
     97          (x.startresults == y.startresults) &&
     98          (x.startresults == y.startresults));
    9399}
    94100
     
    118124  outs << " sortfield = \"" << q.sortfield << "\"\n";
    119125  outs << " fuzzysearch = \"" << q.fuzzysearch << "\"\n";
     126  outs << " startresults = \"" << q.startresults << "\"\n";
     127  outs << " endresults = \"" << q.endresults << "\"\n";
    120128  outs << "\n";
    121129
  • trunk/gsdl/src/colservr/queryinfo.h

    r12421 r12655  
    6969  text_t sortfield; // Field to use for sorting result set (currently used by lucene)
    7070  int fuzzysearch; // Should search be fuzzy (only used by Lucene)
     71
     72  int startresults;
     73  int endresults;
    7174
    7275  queryparamclass ();
Note: See TracChangeset for help on using the changeset viewer.