Changeset 12408


Ignore:
Timestamp:
2006-08-07T15:24:47+12:00 (18 years ago)
Author:
mdewsnip
Message:

Added a "-filter" option which can currently be used for specifying range filters (eg. we're going to use it for dates). Many thanks to Me and DL Consulting Ltd.

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/lucene_query.pl

    r12373 r12408  
    4545    my $full_indexdir = shift(@_);
    4646    my $fuzzy = shift(@_);
     47    my $filter_string = shift(@_);
    4748    my $sort_field = shift(@_);
    4849    my $dco = shift(@_);
     
    5556    if (defined($fuzzy)) {
    5657        $cmd .= " -fuzzy";
     58    }
     59    if (defined($filter_string)) {
     60    $cmd .= " -filter \"" . $filter_string . "\"";
    5761    }
    5862    if (defined($sort_field)) {
     
    8286    my $argc = scalar(@argv);
    8387    if ($argc == 0) {
    84     print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzzy] [-sort sort_field] [-dco AND|OR] [-out out_file]\n";
     88    print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-out out_file]\n";
    8589    exit 1;
    8690    }
     
    8993    my $query = undef;
    9094    my $fuzzy = undef;
     95    my $filter_string = undef;
    9196    my $sort_field = undef;
    9297    my $dco = undef;
     
    96101    if ($argv[$i] eq "-fuzzy") {
    97102        $fuzzy = 1;
     103    }
     104        elsif ($argv[$i] eq "-filter") {
     105            $i++;
     106            $filter_string = $argv[$i];
    98107    }
    99108        elsif ($argv[$i] eq "-sort") {
     
    114123    }
    115124
    116     open_java_lucene($full_indexdir, $fuzzy, $sort_field, $dco, $out_file);
     125    open_java_lucene($full_indexdir, $fuzzy, $filter_string, $sort_field, $dco, $out_file);
    117126
    118127    if (defined $query) {
  • trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

    r12390 r12408  
    2424import org.apache.lucene.index.TermFreqVector;
    2525import org.apache.lucene.queryParser.QueryParser;
     26import org.apache.lucene.search.Filter;
    2627import org.apache.lucene.search.Hit;
    2728import org.apache.lucene.search.Hits;
    2829import org.apache.lucene.search.IndexSearcher;
    2930import org.apache.lucene.search.Query;
     31import org.apache.lucene.search.RangeFilter;
    3032import org.apache.lucene.search.Searcher;
    3133import org.apache.lucene.search.Sort;
     
    4648
    4749        Sort sorter = new Sort();
     50        Filter filter = null;
    4851            boolean fuzzy = false;
    4952
     
    5861                            ///ystem.err.println("**** sort by = " + args[i]);
    5962                            sorter = new Sort(args[i]);
     63                        }
     64                    if (args[i].equals("-filter"))
     65                        {
     66                            i++;
     67                filter = parseFilterString(args[i]);
    6068                        }
    6169                    if (args[i].equals("-dco"))
     
    211219
    212220        // Perform the query
    213         Hits hits = searcher.search(query, sorter);
     221        Hits hits;
     222        if (filter != null) {
     223            hits = searcher.search(query, filter, sorter);
     224        }
     225        else {
     226            hits = searcher.search(query, sorter);
     227        }
    214228        System.out.println("<ResultSet>");
    215229        System.out.println("  <QueryString>" + query_string + "</QueryString>");
     
    244258                        // TX field
    245259                        TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
    246                         if (term_freq_vector.size() > 0)
     260                        if (term_freq_vector != null && term_freq_vector.size() > 0)
    247261                            {
    248262                                int[] term_frequencies = term_freq_vector.getTermFrequencies();
     
    316330        }
    317331    }
     332
     333
     334    private static Filter parseFilterString(String filter_string)
     335    {
     336    // Range filters
     337    if (filter_string.matches("(.*):[\\{\\[](.+) TO (.+)[\\}\\]]")) {
     338        String field_name = filter_string.substring(0, filter_string.indexOf(":"));
     339        boolean include_lower = (filter_string.charAt(filter_string.indexOf(":") + 1) == '[');
     340        String lower_term = filter_string.substring(filter_string.indexOf(":") + 2, filter_string.indexOf(" TO "));
     341        String upper_term = filter_string.substring(filter_string.indexOf(" TO ") + " TO ".length(), filter_string.length() - 1);
     342        boolean include_upper = (filter_string.charAt(filter_string.length() - 1) == ']');
     343        return new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
     344    }
     345
     346    System.err.println("Error: Could not understand filter string \"" + filter_string + "\"");
     347    return null;
     348    }
    318349}
  • trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r12390 r12408  
    2424import org.apache.lucene.index.TermFreqVector;
    2525import org.apache.lucene.queryParser.QueryParser;
     26import org.apache.lucene.search.Filter;
    2627import org.apache.lucene.search.Hit;
    2728import org.apache.lucene.search.Hits;
    2829import org.apache.lucene.search.IndexSearcher;
    2930import org.apache.lucene.search.Query;
     31import org.apache.lucene.search.RangeFilter;
    3032import org.apache.lucene.search.Searcher;
    3133import org.apache.lucene.search.Sort;
     
    4648
    4749        Sort sorter = new Sort();
     50        Filter filter = null;
    4851            boolean fuzzy = false;
    4952
     
    5861                            ///ystem.err.println("**** sort by = " + args[i]);
    5962                            sorter = new Sort(args[i]);
     63                        }
     64                    if (args[i].equals("-filter"))
     65                        {
     66                            i++;
     67                filter = parseFilterString(args[i]);
    6068                        }
    6169                    if (args[i].equals("-dco"))
     
    211219
    212220        // Perform the query
    213         Hits hits = searcher.search(query, sorter);
     221        Hits hits;
     222        if (filter != null) {
     223            hits = searcher.search(query, filter, sorter);
     224        }
     225        else {
     226            hits = searcher.search(query, sorter);
     227        }
    214228        System.out.println("<ResultSet>");
    215229        System.out.println("  <QueryString>" + query_string + "</QueryString>");
     
    244258                        // TX field
    245259                        TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
    246                         if (term_freq_vector.size() > 0)
     260                        if (term_freq_vector != null && term_freq_vector.size() > 0)
    247261                            {
    248262                                int[] term_frequencies = term_freq_vector.getTermFrequencies();
     
    316330        }
    317331    }
     332
     333
     334    private static Filter parseFilterString(String filter_string)
     335    {
     336    // Range filters
     337    if (filter_string.matches("(.*):[\\{\\[](.+) TO (.+)[\\}\\]]")) {
     338        String field_name = filter_string.substring(0, filter_string.indexOf(":"));
     339        boolean include_lower = (filter_string.charAt(filter_string.indexOf(":") + 1) == '[');
     340        String lower_term = filter_string.substring(filter_string.indexOf(":") + 2, filter_string.indexOf(" TO "));
     341        String upper_term = filter_string.substring(filter_string.indexOf(" TO ") + " TO ".length(), filter_string.length() - 1);
     342        boolean include_upper = (filter_string.charAt(filter_string.length() - 1) == ']');
     343        return new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
     344    }
     345
     346    System.err.println("Error: Could not understand filter string \"" + filter_string + "\"");
     347    return null;
     348    }
    318349}
Note: See TracChangeset for help on using the changeset viewer.