Changeset 12656
- Timestamp:
- 2006-09-04T10:57:05+12:00 (17 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/lucene_query.pl
r12408 r12656 48 48 my $sort_field = shift(@_); 49 49 my $dco = shift(@_); 50 my $start_results = shift(@_); 51 my $end_results = shift(@_); 50 52 my $out_file = shift(@_); 51 53 … … 65 67 if (defined($dco)) { 66 68 $cmd .= " -dco " . $dco; 69 } 70 if (defined($start_results)) { 71 $cmd .= " -startresults " . $start_results; 72 } 73 if (defined($end_results)) { 74 $cmd .= " -endresults " . $end_results; 67 75 } 68 76 if (defined($out_file)) { … … 86 94 my $argc = scalar(@argv); 87 95 if ($argc == 0) { 88 print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [- out out_file]\n";96 print STDERR "Usage: $PROGNAME full-index-dir [query] [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [-out out_file]\n"; 89 97 exit 1; 90 98 } … … 96 104 my $sort_field = undef; 97 105 my $dco = undef; 106 my $start_results = undef; 107 my $end_results = undef; 98 108 my $out_file = undef; 99 109 for (my $i = 0; $i < scalar(@argv); $i++) … … 114 124 $dco = $argv[$i]; 115 125 } 126 elsif ($argv[$i] eq "-startresults") { 127 $i++; 128 $start_results = $argv[$i]; 129 } 130 elsif ($argv[$i] eq "-endresults") { 131 $i++; 132 $end_results = $argv[$i]; 133 } 116 134 elsif ($argv[$i] eq "-out") { 117 135 $i++; … … 123 141 } 124 142 125 open_java_lucene($full_indexdir, $fuzzy, $filter_string, $sort_field, $dco, $ out_file);143 open_java_lucene($full_indexdir, $fuzzy, $filter_string, $sort_field, $dco, $start_results, $end_results, $out_file); 126 144 127 145 if (defined $query) { -
trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java
r12653 r12656 3 3 * @author [email protected] 4 4 * @author [email protected] 5 * @author [email protected] 6 * @author [email protected] 5 7 * @version 6 8 */ … … 11 13 import java.io.*; 12 14 import java.util.*; 15 import java.util.regex.*; 13 16 14 17 import org.apache.lucene.analysis.Analyzer; … … 41 44 { 42 45 if (args.length == 0) { 43 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] ");46 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number]"); 44 47 return; 45 48 } … … 49 52 IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); 50 53 51 // Create one query parser with stop words, and one with none54 // Create one query parser with the standard set of stop words, and one with none 52 55 QueryParser query_parser = new QueryParser("TX", new StandardAnalyzer(stop_words)); 53 56 QueryParser query_parser_no_stop_words = new QueryParser("TX", new StandardAnalyzer(new String[] { })); 54 57 55 58 Sort sorter = new Sort(); 56 QueryFilter filter = null;59 Filter filter = null; 57 60 boolean fuzzy = false; 61 62 // Paging 63 int start_results = 1; 64 int end_results = -1; 58 65 59 66 // New code to allow the default conjunction operator to be … … 70 77 { 71 78 i++; 72 try { 73 filter = new QueryFilter(query_parser.parse(args[i])); 74 } 75 catch (ParseException exception) { 76 exception.printStackTrace(); 77 } 79 80 // Parse up filter 81 filter = parseFilterString(args[i]); 78 82 } 79 83 if (args[i].equals("-dco")) … … 86 90 fuzzy = true; 87 91 } 92 if (args[i].equals("-startresults")) 93 { 94 i++; 95 if (args[i].matches("\\d+")) 96 { 97 start_results = Integer.parseInt(args[i]); 98 } 99 } 100 if (args[i].equals("-endresults")) 101 { 102 i++; 103 if (args[i].matches("\\d+")) 104 { 105 end_results = Integer.parseInt(args[i]); 106 } 107 } 88 108 } 89 109 … … 104 124 System.out.println("<ResultSet>"); 105 125 System.out.println(" <QueryString>" + query_string + "</QueryString>"); 126 if (filter != null) 127 { 128 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 129 } 106 130 107 131 try { … … 139 163 // Do we need to use a hit iterator to get sorted results? 140 164 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 165 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 166 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 167 168 int counter = 1; 141 169 Iterator hit_iter = hits.iterator(); 142 170 while (hit_iter.hasNext()) … … 145 173 Document doc = hit.getDocument(); 146 174 String node_id = doc.get("nodeID"); 147 System.out.println(" <Match id=\"" + node_id + "\" />"); 148 175 176 // May not be paging results 177 if (start_results == 1 && end_results == -1) 178 { 179 System.out.println(" <Match id=\"" + node_id + "\" />"); 180 } 181 // Otherwise skip up until page offset 182 else if (start_results <= counter && counter <= end_results) 183 { 184 System.out.println(" <Match id=\"" + node_id + "\" />"); 185 } 186 // And skip all the rest 187 149 188 // From the document, extract the Term Vector for the 150 189 // TX field … … 182 221 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 183 222 } 223 ++counter; 184 224 } 185 225 … … 347 387 return query; 348 388 } 389 390 391 /** 392 * @todo Michael to comment 393 */ 394 private static Filter parseFilterString(String filter_string) 395 { 396 Filter result = null; 397 Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*"); 398 Matcher matcher = pattern.matcher(filter_string); 399 if (matcher.matches()) 400 { 401 String field_name = matcher.group(1); 402 boolean include_lower = matcher.group(2).equals("["); 403 String lower_term = matcher.group(3); 404 String upper_term = matcher.group(4); 405 boolean include_upper = matcher.group(5).equals("]"); 406 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 407 } 408 else 409 { 410 System.err.println("Error: Could not understand filter string \"" + filter_string + "\""); 411 } 412 return result; 413 } 414 /** parseFilterString() **/ 349 415 } -
trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r12653 r12656 3 3 * @author [email protected] 4 4 * @author [email protected] 5 * @author [email protected] 6 * @author [email protected] 5 7 * @version 6 8 */ … … 11 13 import java.io.*; 12 14 import java.util.*; 15 import java.util.regex.*; 13 16 14 17 import org.apache.lucene.analysis.Analyzer; … … 41 44 { 42 45 if (args.length == 0) { 43 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] ");46 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzzy] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number]"); 44 47 return; 45 48 } … … 49 52 IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); 50 53 51 // Create one query parser with stop words, and one with none54 // Create one query parser with the standard set of stop words, and one with none 52 55 QueryParser query_parser = new QueryParser("TX", new StandardAnalyzer(stop_words)); 53 56 QueryParser query_parser_no_stop_words = new QueryParser("TX", new StandardAnalyzer(new String[] { })); 54 57 55 58 Sort sorter = new Sort(); 56 QueryFilter filter = null;59 Filter filter = null; 57 60 boolean fuzzy = false; 61 62 // Paging 63 int start_results = 1; 64 int end_results = -1; 58 65 59 66 // New code to allow the default conjunction operator to be … … 70 77 { 71 78 i++; 72 try { 73 filter = new QueryFilter(query_parser.parse(args[i])); 74 } 75 catch (ParseException exception) { 76 exception.printStackTrace(); 77 } 79 80 // Parse up filter 81 filter = parseFilterString(args[i]); 78 82 } 79 83 if (args[i].equals("-dco")) … … 86 90 fuzzy = true; 87 91 } 92 if (args[i].equals("-startresults")) 93 { 94 i++; 95 if (args[i].matches("\\d+")) 96 { 97 start_results = Integer.parseInt(args[i]); 98 } 99 } 100 if (args[i].equals("-endresults")) 101 { 102 i++; 103 if (args[i].matches("\\d+")) 104 { 105 end_results = Integer.parseInt(args[i]); 106 } 107 } 88 108 } 89 109 … … 104 124 System.out.println("<ResultSet>"); 105 125 System.out.println(" <QueryString>" + query_string + "</QueryString>"); 126 if (filter != null) 127 { 128 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 129 } 106 130 107 131 try { … … 139 163 // Do we need to use a hit iterator to get sorted results? 140 164 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 165 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 166 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 167 168 int counter = 1; 141 169 Iterator hit_iter = hits.iterator(); 142 170 while (hit_iter.hasNext()) … … 145 173 Document doc = hit.getDocument(); 146 174 String node_id = doc.get("nodeID"); 147 System.out.println(" <Match id=\"" + node_id + "\" />"); 148 175 176 // May not be paging results 177 if (start_results == 1 && end_results == -1) 178 { 179 System.out.println(" <Match id=\"" + node_id + "\" />"); 180 } 181 // Otherwise skip up until page offset 182 else if (start_results <= counter && counter <= end_results) 183 { 184 System.out.println(" <Match id=\"" + node_id + "\" />"); 185 } 186 // And skip all the rest 187 149 188 // From the document, extract the Term Vector for the 150 189 // TX field … … 182 221 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 183 222 } 223 ++counter; 184 224 } 185 225 … … 347 387 return query; 348 388 } 389 390 391 /** 392 * @todo Michael to comment 393 */ 394 private static Filter parseFilterString(String filter_string) 395 { 396 Filter result = null; 397 Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*"); 398 Matcher matcher = pattern.matcher(filter_string); 399 if (matcher.matches()) 400 { 401 String field_name = matcher.group(1); 402 boolean include_lower = matcher.group(2).equals("["); 403 String lower_term = matcher.group(3); 404 String upper_term = matcher.group(4); 405 boolean include_upper = matcher.group(5).equals("]"); 406 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 407 } 408 else 409 { 410 System.err.println("Error: Could not understand filter string \"" + filter_string + "\""); 411 } 412 return result; 413 } 414 /** parseFilterString() **/ 349 415 }
Note:
See TracChangeset
for help on using the changeset viewer.