Changeset 12770 for trunk/gsdl


Ignore:
Timestamp:
2006-09-18T14:32:31+12:00 (18 years ago)
Author:
mdewsnip
Message:

Changed the Lucene "-fuzzy" argument to "-fuzziness <value>", for more accurate control.

Location:
trunk/gsdl
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/lucene_query.pl

    r12656 r12770  
    4444{
    4545    my $full_indexdir = shift(@_);
    46     my $fuzzy = shift(@_);
     46    my $fuzziness = shift(@_);
    4747    my $filter_string = shift(@_);
    4848    my $sort_field = shift(@_);
     
    5656
    5757    my $cmd = "| " . $java_lucene . " \"" . $full_indexdir . "\"";
    58     if (defined($fuzzy)) {
    59         $cmd .= " -fuzzy";
     58    if (defined($fuzziness)) {
     59        $cmd .= " -fuzziness " . $fuzziness;
    6060    }
    6161    if (defined($filter_string)) {
     
    9494    my $argc = scalar(@argv);
    9595    if ($argc == 0) {
    96     print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [-out out_file]\n";
     96    print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [-out out_file]\n";
    9797    exit 1;
    9898    }
     
    100100    my $full_indexdir = shift(@argv);
    101101    my $query = undef;
    102     my $fuzzy = undef;
     102    my $fuzziness = undef;
    103103    my $filter_string = undef;
    104104    my $sort_field = undef;
     
    109109    for (my $i = 0; $i < scalar(@argv); $i++)
    110110    {
    111     if ($argv[$i] eq "-fuzzy") {
    112         $fuzzy = 1;
     111    if ($argv[$i] eq "-fuzziness") {
     112        $i++;
     113        $fuzziness = $argv[$i];
    113114    }
    114115        elsif ($argv[$i] eq "-filter") {
     
    141142    }
    142143
    143     open_java_lucene($full_indexdir, $fuzzy, $filter_string, $sort_field, $dco, $start_results, $end_results, $out_file);
     144    open_java_lucene($full_indexdir, $fuzziness, $filter_string, $sort_field, $dco, $start_results, $end_results, $out_file);
    144145
    145146    if (defined $query) {
  • trunk/gsdl/src/colservr/lucenesearch.cpp

    r12685 r12770  
    123123    cmd += " -sort \"" + queryparams.sortfield + "\"";
    124124  }
    125   if (queryparams.fuzzysearch)
    126     {
    127       cmd += " -fuzzy";
    128     }
     125  if (!queryparams.fuzziness.empty()) {
     126    cmd += " -fuzziness " + queryparams.fuzziness;
     127  }
    129128
    130129  // New code to support configuration of the default conjuction operator
  • trunk/gsdl/src/colservr/queryfilter.cpp

    r12655 r12770  
    9292  query.filterstring = filterOptions["FilterString"].defaultValue;  // Lucene specific
    9393  query.sortfield = filterOptions["SortField"].defaultValue;  // Lucene specific
    94   query.fuzzysearch = (filterOptions["FuzzySearch"].defaultValue == "true");  // Lucene specific
     94  query.fuzziness = filterOptions["Fuzziness"].defaultValue;  // Lucene specific
    9595  query.maxnumeric = maxnumeric;
    9696  OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
     
    121121      query.filterstring = filterOptions["FilterString"].defaultValue;  // Lucene specific
    122122      query.sortfield = filterOptions["SortField"].defaultValue;  // Lucene specific
    123       query.fuzzysearch = (filterOptions["FuzzySearch"].defaultValue == "true");  // Lucene specific
     123      query.fuzziness = filterOptions["Fuzziness"].defaultValue;  // Lucene specific
    124124      query.maxnumeric = maxnumeric;
    125125      // "all", needed when combining queries where the document results are needed
     
    158158    } else if ((*options_here).name == "SortField") {
    159159      query.sortfield = (*options_here).value;
    160     } else if ((*options_here).name == "FuzzySearch") {
    161       query.fuzzysearch = ((*options_here).value == "true");
     160    } else if ((*options_here).name == "Fuzziness") {
     161      query.fuzziness = (*options_here).value;
    162162    } else {
    163163      logout << text_t2ascii
  • trunk/gsdl/src/colservr/queryinfo.cpp

    r12655 r12770  
    4949  filterstring.clear();
    5050  sortfield.clear();
    51   fuzzysearch = 0; // 0 = not fuzzy, 1 = fuzzy
     51  fuzziness.clear();
    5252  startresults = 1; // all
    5353  endresults = 10; // all
     
    7171  filterstring = q.filterstring;
    7272  sortfield = q.sortfield;
    73   fuzzysearch = q.fuzzysearch;
     73  fuzziness = q.fuzziness;
    7474  startresults = q.startresults;
    7575  endresults = q.endresults;
     
    9494      (x.filterstring == y.filterstring) &&
    9595          (x.sortfield == y.sortfield) &&
    96           (x.fuzzysearch == y.fuzzysearch) &&
     96          (x.fuzziness == y.fuzziness) &&
    9797          (x.startresults == y.startresults) &&
    9898          (x.startresults == y.startresults));
     
    123123  outs << " filterstring = \"" << q.filterstring << "\"\n";
    124124  outs << " sortfield = \"" << q.sortfield << "\"\n";
    125   outs << " fuzzysearch = \"" << q.fuzzysearch << "\"\n";
     125  outs << " fuzziness = \"" << q.fuzziness << "\"\n";
    126126  outs << " startresults = \"" << q.startresults << "\"\n";
    127127  outs << " endresults = \"" << q.endresults << "\"\n";
  • trunk/gsdl/src/colservr/queryinfo.h

    r12655 r12770  
    6868  text_t filterstring; // Filter specified (currently only used by Lucene)
    6969  text_t sortfield; // Field to use for sorting result set (currently used by lucene)
    70   int fuzzysearch; // Should search be fuzzy (only used by Lucene)
     70  text_t fuzziness; // Search fuzziness amount between 0.0 and 1.0 (only used by Lucene)
    7171
    7272  int startresults;
  • trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

    r12656 r12770  
    4444    {
    4545    if (args.length == 0) {
    46         System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number]");
     46        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number]");
    4747        return;
    4848    }
     
    5858        Sort sorter = new Sort();
    5959        Filter filter = null;
    60             boolean fuzzy = false;
     60        String fuzziness = null;
    6161
    6262        // Paging
     
    8686                            default_conjuction_operator = args[i];
    8787                        }
    88                     if (args[i].equals("-fuzzy"))
     88                    if (args[i].equals("-fuzziness"))
    8989                        {
    90                             fuzzy = true;
     90                i++;
     91                fuzziness = args[i];
    9192                        }
    9293            if (args[i].equals("-startresults"))
     
    133134            query_including_stop_words = query_including_stop_words.rewrite(reader);
    134135
    135             Query query = parseQuery(reader, query_parser, query_string, fuzzy);
     136            Query query = parseQuery(reader, query_parser, query_string, fuzziness);
    136137            query = query.rewrite(reader);
    137138
     
    271272
    272273
    273     private static Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, boolean fuzzy)
     274    private static Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
    274275    throws java.io.IOException, org.apache.lucene.queryParser.ParseException
    275276    {
     
    304305    // If this is a fuzzy search, then we need to add the fuzzy
    305306    // flag to each of the query terms
    306     if (fuzzy && query.toString().length() > 0)
     307    if (fuzziness != null && query.toString().length() > 0)
    307308        {
    308309        // Revert the query to a string
     
    363364                    {
    364365                    ///ystem.err.println("Yahoo! Found fuzzy term.");
    365                     mutable_query_string.insert(o, '~');
     366                    mutable_query_string.insert(o, '~' + fuzziness);
    366367                    o++;
    367368                    s = 0; // Reset
     
    374375        if (s == 3)
    375376            {
    376             mutable_query_string.append('~');
     377            mutable_query_string.append('~' + fuzziness);
    377378            }
    378379        // Reparse the query
  • trunk/gsdl/src/recpt/queryaction.cpp

    r12768 r12770  
    430430  argsinfo.addarginfo (NULL, arg_ainfo);
    431431
    432   // "fuzzy" controls whether the search is fuzzy logic or not
    433   // (only implemented for Lucene collection... umm.. ever).
    434   arg_ainfo.shortname = "fuzzy";
    435   arg_ainfo.longname = "is this search be fuzzy";
    436   arg_ainfo.multiplechar = false;
    437   arg_ainfo.defaultstatus = cgiarginfo::weak;
    438   arg_ainfo.argdefault = "0";
     432  // "fuzziness" controls how closely the search terms must match
     433  // 1.0 = exact match, 0.1 = very inexact match (only implemented for Lucene)
     434  arg_ainfo.shortname = "fuzziness";
     435  arg_ainfo.longname = "Lucene fuzziness value";
     436  arg_ainfo.multiplechar = true;
     437  arg_ainfo.defaultstatus = cgiarginfo::weak;
     438  arg_ainfo.argdefault = "1.0";
    439439  arg_ainfo.savedarginfo = cgiarginfo::must;
    440440  argsinfo.addarginfo (NULL, arg_ainfo);
  • trunk/gsdl/src/recpt/querytools.cpp

    r12685 r12770  
    153153  }
    154154
    155   // sort field for lucene
    156   option.name = "FuzzySearch";
    157   option.value = (args.getintarg("fuzzy")) ? "true" : "false";
    158   request.filterOptions.push_back (option);
     155  if (!args["fuzziness"].empty()) { // fuzziness value for lucene
     156    option.name = "Fuzziness";
     157    option.value = args["fuzziness"];
     158    request.filterOptions.push_back (option);
     159  }
    159160
    160161  set_more_queryfilter_options (request, args);
Note: See TracChangeset for help on using the changeset viewer.