Changeset 12772
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java
r12770 r12772 67 67 // definable 68 68 String default_conjuction_operator = "OR"; 69 for (int i = 1; i < args.length; i++) 70 { 71 if (args[i].equals("-sort")) 72 { 73 i++; 74 sorter = new Sort(args[i]); 75 } 76 if (args[i].equals("-filter")) 77 { 78 i++; 79 80 // Parse up filter 81 filter = parseFilterString(args[i]); 82 } 83 if (args[i].equals("-dco")) 84 { 85 i++; 86 default_conjuction_operator = args[i]; 87 } 88 if (args[i].equals("-fuzziness")) 89 { 90 i++; 91 fuzziness = args[i]; 92 } 93 if (args[i].equals("-startresults")) 94 { 95 i++; 96 if (args[i].matches("\\d+")) 97 { 98 start_results = Integer.parseInt(args[i]); 99 } 100 } 101 if (args[i].equals("-endresults")) 102 { 103 i++; 104 if (args[i].matches("\\d+")) 105 { 106 end_results = Integer.parseInt(args[i]); 107 } 108 } 109 } 110 69 for (int i = 1; i < args.length; i++) { 70 if (args[i].equals("-sort")) { 71 i++; 72 sorter = new Sort(args[i]); 73 } 74 if (args[i].equals("-filter")) { 75 i++; 76 77 // Parse up filter 78 filter = parseFilterString(args[i]); 79 } 80 if (args[i].equals("-dco")) { 81 i++; 82 default_conjuction_operator = args[i]; 83 } 84 if (args[i].equals("-fuzziness")) { 85 i++; 86 fuzziness = args[i]; 87 } 88 if (args[i].equals("-startresults")) { 89 i++; 90 if (args[i].matches("\\d+")) { 91 start_results = Integer.parseInt(args[i]); 92 } 93 } 94 if (args[i].equals("-endresults")) { 95 i++; 96 if (args[i].matches("\\d+")) { 97 end_results = Integer.parseInt(args[i]); 98 } 99 } 100 } 101 111 102 // Lucene does "OR" queries by default; do an "AND" query if specified 112 if (default_conjuction_operator.equals("AND")) 113 { 114 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 115 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR); 116 } 103 if (default_conjuction_operator.equals("AND")) { 104 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 105 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR); 106 } 117 107 118 108 BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); … … 125 115 System.out.println("<ResultSet>"); 126 116 System.out.println(" <QueryString>" + query_string + "</QueryString>"); 127 if (filter != null) 128 { 129 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 130 } 117 if (filter != null) { 118 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 119 } 131 120 132 121 try { … … 152 141 query.extractTerms(terms); 153 142 Iterator iter = terms.iterator(); 154 while (iter.hasNext()) 155 { 156 Term term = (Term) iter.next(); 157 // If you wanted to limit this to just TX terms add 158 // something like this: 159 //if (term.field().equals("TX")) 160 term_counts.put(term.text(), new Integer(0)); 161 term_fields.put(term.text(), term.field()); 162 } 143 while (iter.hasNext()) { 144 Term term = (Term) iter.next(); 145 // If you wanted to limit this to just TX terms add 146 // something like this: 147 //if (term.field().equals("TX")) 148 term_counts.put(term.text(), new Integer(0)); 149 term_fields.put(term.text(), term.field()); 150 } 163 151 164 152 // Do we need to use a hit iterator to get sorted results? … … 169 157 int counter = 1; 170 158 Iterator hit_iter = hits.iterator(); 171 while (hit_iter.hasNext()) 172 { 173 Hit hit = (Hit) hit_iter.next(); 174 Document doc = hit.getDocument(); 175 String node_id = doc.get("nodeID"); 176 177 // May not be paging results 178 if (start_results == 1 && end_results == -1) 179 { 180 System.out.println(" <Match id=\"" + node_id + "\" />"); 159 while (hit_iter.hasNext()) { 160 Hit hit = (Hit) hit_iter.next(); 161 Document doc = hit.getDocument(); 162 String node_id = doc.get("nodeID"); 163 164 // May not be paging results 165 if (start_results == 1 && end_results == -1) { 166 System.out.println(" <Match id=\"" + node_id + "\" />"); 167 } 168 // Otherwise skip up until page offset 169 else if (start_results <= counter && counter <= end_results) { 170 System.out.println(" <Match id=\"" + node_id + "\" />"); 171 } 172 // And skip all the rest 173 174 // From the document, extract the Term Vector for the 175 // TX field 176 TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX"); 177 if (term_freq_vector != null && term_freq_vector.size() > 0) { 178 int[] term_frequencies = term_freq_vector.getTermFrequencies(); 179 // Now for each query term, determine the 180 // frequency - which may of course be 0. 181 Set term_counts_set = term_counts.keySet(); 182 Iterator terms_iter = term_counts_set.iterator(); 183 while (terms_iter.hasNext()) { 184 185 String term = (String) terms_iter.next(); 186 Integer count_integer = (Integer) term_counts.get(term); 187 int count = count_integer.intValue(); 188 int index = term_freq_vector.indexOf(term); 189 // If the term has a count, then add to 190 // the total count for this term 191 if (index != -1) { 192 count += term_frequencies[index]; 181 193 } 182 // Otherwise skip up until page offset 183 else if (start_results <= counter && counter <= end_results) 184 { 185 System.out.println(" <Match id=\"" + node_id + "\" />"); 186 } 187 // And skip all the rest 188 189 // From the document, extract the Term Vector for the 190 // TX field 191 TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX"); 192 if (term_freq_vector != null && term_freq_vector.size() > 0) 193 { 194 int[] term_frequencies = term_freq_vector.getTermFrequencies(); 195 // Now for each query term, determine the 196 // frequency - which may of course be 0. 197 Set term_counts_set = term_counts.keySet(); 198 Iterator terms_iter = term_counts_set.iterator(); 199 while (terms_iter.hasNext()) 200 { 201 String term = (String) terms_iter.next(); 202 Integer count_integer = (Integer) term_counts.get(term); 203 int count = count_integer.intValue(); 204 int index = term_freq_vector.indexOf(term); 205 // If the term has a count, then add to 206 // the total count for this term 207 if (index != -1) 208 { 209 count += term_frequencies[index]; 210 211 } 212 // Store the result 213 term_counts.put(term, new Integer(count)); 214 count_integer = null; 215 term = null; 216 } 217 terms_iter = null; 218 term_counts_set = null; 219 } 220 else 221 { 222 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 223 } 224 ++counter; 225 } 226 194 // Store the result 195 term_counts.put(term, new Integer(count)); 196 count_integer = null; 197 term = null; 198 } 199 terms_iter = null; 200 term_counts_set = null; 201 } 202 else { 203 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 204 } 205 ++counter; 206 } 207 227 208 // Retrieve all the useful terms 228 209 Set term_counts_set = term_counts.keySet(); … … 230 211 // Iterate over them 231 212 Iterator terms_iter = term_counts_set.iterator(); 232 while (terms_iter.hasNext()) 233 { 234 String term = (String) terms_iter.next(); 235 Integer count = (Integer) term_counts.get(term); 236 String field = (String) term_fields.get(term); 237 System.out.println(" <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />"); 238 count = null; 239 term = null; 240 } 213 while (terms_iter.hasNext()) { 214 String term = (String) terms_iter.next(); 215 Integer count = (Integer) term_counts.get(term); 216 String field = (String) term_fields.get(term); 217 System.out.println(" <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />"); 218 count = null; 219 term = null; 220 } 241 221 // Cleanup 242 222 terms_iter = null; … … 281 261 int paren_count = 0; 282 262 boolean seen_paren = false; 283 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) 284 { 285 if (query_string.charAt(offset) == '(') 286 { 287 paren_count++; 288 seen_paren = true; 289 } 290 if (query_string.charAt(offset) == ')') 291 { 292 paren_count--; 293 } 294 offset++; 295 } 263 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) { 264 if (query_string.charAt(offset) == '(') { 265 paren_count++; 266 seen_paren = true; 267 } 268 if (query_string.charAt(offset) == ')') { 269 paren_count--; 270 } 271 offset++; 272 } 296 273 String query_prefix = query_string.substring(0, offset); 297 274 String query_suffix = query_string.substring(offset); … … 305 282 // If this is a fuzzy search, then we need to add the fuzzy 306 283 // flag to each of the query terms 307 if (fuzziness != null && query.toString().length() > 0) 308 { 309 // Revert the query to a string 310 System.err.println("Rewritten query: " + query.toString()); 311 // Search through the string for TX:<term> query terms 312 // and append the ~ operator. Not that this search will 313 // not change phrase searches (TX:"<term> <term>") as 314 // fuzzy searching is not possible for these entries. 315 // Yahoo! Time for a state machine! 316 StringBuffer mutable_query_string = new StringBuffer(query.toString()); 317 int o = 0; // Offset 318 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX: 319 int s = 0; // State 320 while(o < mutable_query_string.length()) 321 { 322 char c = mutable_query_string.charAt(o); 323 if (s == 0 && c == 'T') 324 { 325 ///ystem.err.println("Found T!"); 326 s = 1; 327 } 328 else if (s == 1) 329 { 330 if (c == 'X') 331 { 332 ///ystem.err.println("Found X!"); 333 s = 2; 334 } 335 else 336 { 337 s = 0; // Reset 338 } 339 } 340 else if (s == 2) 341 { 342 if (c == ':') 343 { 344 ///ystem.err.println("Found TX:!"); 345 s = 3; 346 } 347 else 348 { 349 s = 0; // Reset 350 } 351 } 352 else if (s == 3) 353 { 354 // Don't process phrases 355 if (c == '"') 356 { 357 ///ystem.err.println("Stupid phrase..."); 358 s = 0; // Reset 359 } 360 // Found the end of the term... add the 361 // fuzzy search indicator 362 // Nor outside the scope of parentheses 363 else if (Character.isWhitespace(c) || c == ')') 364 { 365 ///ystem.err.println("Yahoo! Found fuzzy term."); 366 mutable_query_string.insert(o, '~' + fuzziness); 367 o++; 368 s = 0; // Reset 369 } 370 } 284 if (fuzziness != null && query.toString().length() > 0) { 285 // Revert the query to a string 286 System.err.println("Rewritten query: " + query.toString()); 287 // Search through the string for TX:<term> query terms 288 // and append the ~ operator. Not that this search will 289 // not change phrase searches (TX:"<term> <term>") as 290 // fuzzy searching is not possible for these entries. 291 // Yahoo! Time for a state machine! 292 StringBuffer mutable_query_string = new StringBuffer(query.toString()); 293 int o = 0; // Offset 294 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX: 295 int s = 0; // State 296 while (o < mutable_query_string.length()) { 297 char c = mutable_query_string.charAt(o); 298 if (s == 0 && c == 'T') { 299 ///ystem.err.println("Found T!"); 300 s = 1; 301 } 302 else if (s == 1) { 303 if (c == 'X') { 304 ///ystem.err.println("Found X!"); 305 s = 2; 306 } 307 else { 308 s = 0; // Reset 309 } 310 } 311 else if (s == 2) { 312 if (c == ':') { 313 ///ystem.err.println("Found TX:!"); 314 s = 3; 315 } 316 else { 317 s = 0; // Reset 318 } 319 } 320 else if (s == 3) { 321 // Don't process phrases 322 if (c == '"') { 323 ///ystem.err.println("Stupid phrase..."); 324 s = 0; // Reset 325 } 326 // Found the end of the term... add the 327 // fuzzy search indicator 328 // Nor outside the scope of parentheses 329 else if (Character.isWhitespace(c) || c == ')') { 330 ///ystem.err.println("Yahoo! Found fuzzy term."); 331 mutable_query_string.insert(o, '~' + fuzziness); 371 332 o++; 372 } 373 // If we were in the state of looking for the end of a 374 // term - then we just found it! 375 if (s == 3) 376 { 377 mutable_query_string.append('~' + fuzziness); 378 } 379 // Reparse the query 380 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix); 381 query = query_parser.parse(mutable_query_string.toString() + query_suffix); 382 } 383 else 384 { 385 query = query_parser.parse(query_prefix + query_suffix); 386 } 387 333 s = 0; // Reset 334 } 335 } 336 o++; 337 } 338 // If we were in the state of looking for the end of a 339 // term - then we just found it! 340 if (s == 3) { 341 mutable_query_string.append('~' + fuzziness); 342 } 343 // Reparse the query 344 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix); 345 query = query_parser.parse(mutable_query_string.toString() + query_suffix); 346 } 347 else { 348 query = query_parser.parse(query_prefix + query_suffix); 349 } 350 388 351 return query; 389 352 } … … 398 361 Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*"); 399 362 Matcher matcher = pattern.matcher(filter_string); 400 if (matcher.matches()) 401 { 363 if (matcher.matches()) { 402 364 String field_name = matcher.group(1); 403 365 boolean include_lower = matcher.group(2).equals("["); … … 406 368 boolean include_upper = matcher.group(5).equals("]"); 407 369 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 408 } 409 else 410 { 370 } 371 else { 411 372 System.err.println("Error: Could not understand filter string \"" + filter_string + "\""); 412 373 } 413 374 return result; 414 375 } -
trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r12770 r12772 67 67 // definable 68 68 String default_conjuction_operator = "OR"; 69 for (int i = 1; i < args.length; i++) 70 { 71 if (args[i].equals("-sort")) 72 { 73 i++; 74 sorter = new Sort(args[i]); 75 } 76 if (args[i].equals("-filter")) 77 { 78 i++; 79 80 // Parse up filter 81 filter = parseFilterString(args[i]); 82 } 83 if (args[i].equals("-dco")) 84 { 85 i++; 86 default_conjuction_operator = args[i]; 87 } 88 if (args[i].equals("-fuzziness")) 89 { 90 i++; 91 fuzziness = args[i]; 92 } 93 if (args[i].equals("-startresults")) 94 { 95 i++; 96 if (args[i].matches("\\d+")) 97 { 98 start_results = Integer.parseInt(args[i]); 99 } 100 } 101 if (args[i].equals("-endresults")) 102 { 103 i++; 104 if (args[i].matches("\\d+")) 105 { 106 end_results = Integer.parseInt(args[i]); 107 } 108 } 109 } 110 69 for (int i = 1; i < args.length; i++) { 70 if (args[i].equals("-sort")) { 71 i++; 72 sorter = new Sort(args[i]); 73 } 74 if (args[i].equals("-filter")) { 75 i++; 76 77 // Parse up filter 78 filter = parseFilterString(args[i]); 79 } 80 if (args[i].equals("-dco")) { 81 i++; 82 default_conjuction_operator = args[i]; 83 } 84 if (args[i].equals("-fuzziness")) { 85 i++; 86 fuzziness = args[i]; 87 } 88 if (args[i].equals("-startresults")) { 89 i++; 90 if (args[i].matches("\\d+")) { 91 start_results = Integer.parseInt(args[i]); 92 } 93 } 94 if (args[i].equals("-endresults")) { 95 i++; 96 if (args[i].matches("\\d+")) { 97 end_results = Integer.parseInt(args[i]); 98 } 99 } 100 } 101 111 102 // Lucene does "OR" queries by default; do an "AND" query if specified 112 if (default_conjuction_operator.equals("AND")) 113 { 114 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 115 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR); 116 } 103 if (default_conjuction_operator.equals("AND")) { 104 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 105 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR); 106 } 117 107 118 108 BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); … … 125 115 System.out.println("<ResultSet>"); 126 116 System.out.println(" <QueryString>" + query_string + "</QueryString>"); 127 if (filter != null) 128 { 129 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 130 } 117 if (filter != null) { 118 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 119 } 131 120 132 121 try { … … 152 141 query.extractTerms(terms); 153 142 Iterator iter = terms.iterator(); 154 while (iter.hasNext()) 155 { 156 Term term = (Term) iter.next(); 157 // If you wanted to limit this to just TX terms add 158 // something like this: 159 //if (term.field().equals("TX")) 160 term_counts.put(term.text(), new Integer(0)); 161 term_fields.put(term.text(), term.field()); 162 } 143 while (iter.hasNext()) { 144 Term term = (Term) iter.next(); 145 // If you wanted to limit this to just TX terms add 146 // something like this: 147 //if (term.field().equals("TX")) 148 term_counts.put(term.text(), new Integer(0)); 149 term_fields.put(term.text(), term.field()); 150 } 163 151 164 152 // Do we need to use a hit iterator to get sorted results? … … 169 157 int counter = 1; 170 158 Iterator hit_iter = hits.iterator(); 171 while (hit_iter.hasNext()) 172 { 173 Hit hit = (Hit) hit_iter.next(); 174 Document doc = hit.getDocument(); 175 String node_id = doc.get("nodeID"); 176 177 // May not be paging results 178 if (start_results == 1 && end_results == -1) 179 { 180 System.out.println(" <Match id=\"" + node_id + "\" />"); 159 while (hit_iter.hasNext()) { 160 Hit hit = (Hit) hit_iter.next(); 161 Document doc = hit.getDocument(); 162 String node_id = doc.get("nodeID"); 163 164 // May not be paging results 165 if (start_results == 1 && end_results == -1) { 166 System.out.println(" <Match id=\"" + node_id + "\" />"); 167 } 168 // Otherwise skip up until page offset 169 else if (start_results <= counter && counter <= end_results) { 170 System.out.println(" <Match id=\"" + node_id + "\" />"); 171 } 172 // And skip all the rest 173 174 // From the document, extract the Term Vector for the 175 // TX field 176 TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX"); 177 if (term_freq_vector != null && term_freq_vector.size() > 0) { 178 int[] term_frequencies = term_freq_vector.getTermFrequencies(); 179 // Now for each query term, determine the 180 // frequency - which may of course be 0. 181 Set term_counts_set = term_counts.keySet(); 182 Iterator terms_iter = term_counts_set.iterator(); 183 while (terms_iter.hasNext()) { 184 185 String term = (String) terms_iter.next(); 186 Integer count_integer = (Integer) term_counts.get(term); 187 int count = count_integer.intValue(); 188 int index = term_freq_vector.indexOf(term); 189 // If the term has a count, then add to 190 // the total count for this term 191 if (index != -1) { 192 count += term_frequencies[index]; 181 193 } 182 // Otherwise skip up until page offset 183 else if (start_results <= counter && counter <= end_results) 184 { 185 System.out.println(" <Match id=\"" + node_id + "\" />"); 186 } 187 // And skip all the rest 188 189 // From the document, extract the Term Vector for the 190 // TX field 191 TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX"); 192 if (term_freq_vector != null && term_freq_vector.size() > 0) 193 { 194 int[] term_frequencies = term_freq_vector.getTermFrequencies(); 195 // Now for each query term, determine the 196 // frequency - which may of course be 0. 197 Set term_counts_set = term_counts.keySet(); 198 Iterator terms_iter = term_counts_set.iterator(); 199 while (terms_iter.hasNext()) 200 { 201 String term = (String) terms_iter.next(); 202 Integer count_integer = (Integer) term_counts.get(term); 203 int count = count_integer.intValue(); 204 int index = term_freq_vector.indexOf(term); 205 // If the term has a count, then add to 206 // the total count for this term 207 if (index != -1) 208 { 209 count += term_frequencies[index]; 210 211 } 212 // Store the result 213 term_counts.put(term, new Integer(count)); 214 count_integer = null; 215 term = null; 216 } 217 terms_iter = null; 218 term_counts_set = null; 219 } 220 else 221 { 222 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 223 } 224 ++counter; 225 } 226 194 // Store the result 195 term_counts.put(term, new Integer(count)); 196 count_integer = null; 197 term = null; 198 } 199 terms_iter = null; 200 term_counts_set = null; 201 } 202 else { 203 ///ystem.err.println("Error! Missing term vector for document " + hit.getId()); 204 } 205 ++counter; 206 } 207 227 208 // Retrieve all the useful terms 228 209 Set term_counts_set = term_counts.keySet(); … … 230 211 // Iterate over them 231 212 Iterator terms_iter = term_counts_set.iterator(); 232 while (terms_iter.hasNext()) 233 { 234 String term = (String) terms_iter.next(); 235 Integer count = (Integer) term_counts.get(term); 236 String field = (String) term_fields.get(term); 237 System.out.println(" <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />"); 238 count = null; 239 term = null; 240 } 213 while (terms_iter.hasNext()) { 214 String term = (String) terms_iter.next(); 215 Integer count = (Integer) term_counts.get(term); 216 String field = (String) term_fields.get(term); 217 System.out.println(" <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />"); 218 count = null; 219 term = null; 220 } 241 221 // Cleanup 242 222 terms_iter = null; … … 281 261 int paren_count = 0; 282 262 boolean seen_paren = false; 283 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) 284 { 285 if (query_string.charAt(offset) == '(') 286 { 287 paren_count++; 288 seen_paren = true; 289 } 290 if (query_string.charAt(offset) == ')') 291 { 292 paren_count--; 293 } 294 offset++; 295 } 263 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) { 264 if (query_string.charAt(offset) == '(') { 265 paren_count++; 266 seen_paren = true; 267 } 268 if (query_string.charAt(offset) == ')') { 269 paren_count--; 270 } 271 offset++; 272 } 296 273 String query_prefix = query_string.substring(0, offset); 297 274 String query_suffix = query_string.substring(offset); … … 305 282 // If this is a fuzzy search, then we need to add the fuzzy 306 283 // flag to each of the query terms 307 if (fuzziness != null && query.toString().length() > 0) 308 { 309 // Revert the query to a string 310 System.err.println("Rewritten query: " + query.toString()); 311 // Search through the string for TX:<term> query terms 312 // and append the ~ operator. Not that this search will 313 // not change phrase searches (TX:"<term> <term>") as 314 // fuzzy searching is not possible for these entries. 315 // Yahoo! Time for a state machine! 316 StringBuffer mutable_query_string = new StringBuffer(query.toString()); 317 int o = 0; // Offset 318 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX: 319 int s = 0; // State 320 while(o < mutable_query_string.length()) 321 { 322 char c = mutable_query_string.charAt(o); 323 if (s == 0 && c == 'T') 324 { 325 ///ystem.err.println("Found T!"); 326 s = 1; 327 } 328 else if (s == 1) 329 { 330 if (c == 'X') 331 { 332 ///ystem.err.println("Found X!"); 333 s = 2; 334 } 335 else 336 { 337 s = 0; // Reset 338 } 339 } 340 else if (s == 2) 341 { 342 if (c == ':') 343 { 344 ///ystem.err.println("Found TX:!"); 345 s = 3; 346 } 347 else 348 { 349 s = 0; // Reset 350 } 351 } 352 else if (s == 3) 353 { 354 // Don't process phrases 355 if (c == '"') 356 { 357 ///ystem.err.println("Stupid phrase..."); 358 s = 0; // Reset 359 } 360 // Found the end of the term... add the 361 // fuzzy search indicator 362 // Nor outside the scope of parentheses 363 else if (Character.isWhitespace(c) || c == ')') 364 { 365 ///ystem.err.println("Yahoo! Found fuzzy term."); 366 mutable_query_string.insert(o, '~' + fuzziness); 367 o++; 368 s = 0; // Reset 369 } 370 } 284 if (fuzziness != null && query.toString().length() > 0) { 285 // Revert the query to a string 286 System.err.println("Rewritten query: " + query.toString()); 287 // Search through the string for TX:<term> query terms 288 // and append the ~ operator. Not that this search will 289 // not change phrase searches (TX:"<term> <term>") as 290 // fuzzy searching is not possible for these entries. 291 // Yahoo! Time for a state machine! 292 StringBuffer mutable_query_string = new StringBuffer(query.toString()); 293 int o = 0; // Offset 294 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX: 295 int s = 0; // State 296 while (o < mutable_query_string.length()) { 297 char c = mutable_query_string.charAt(o); 298 if (s == 0 && c == 'T') { 299 ///ystem.err.println("Found T!"); 300 s = 1; 301 } 302 else if (s == 1) { 303 if (c == 'X') { 304 ///ystem.err.println("Found X!"); 305 s = 2; 306 } 307 else { 308 s = 0; // Reset 309 } 310 } 311 else if (s == 2) { 312 if (c == ':') { 313 ///ystem.err.println("Found TX:!"); 314 s = 3; 315 } 316 else { 317 s = 0; // Reset 318 } 319 } 320 else if (s == 3) { 321 // Don't process phrases 322 if (c == '"') { 323 ///ystem.err.println("Stupid phrase..."); 324 s = 0; // Reset 325 } 326 // Found the end of the term... add the 327 // fuzzy search indicator 328 // Nor outside the scope of parentheses 329 else if (Character.isWhitespace(c) || c == ')') { 330 ///ystem.err.println("Yahoo! Found fuzzy term."); 331 mutable_query_string.insert(o, '~' + fuzziness); 371 332 o++; 372 } 373 // If we were in the state of looking for the end of a 374 // term - then we just found it! 375 if (s == 3) 376 { 377 mutable_query_string.append('~' + fuzziness); 378 } 379 // Reparse the query 380 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix); 381 query = query_parser.parse(mutable_query_string.toString() + query_suffix); 382 } 383 else 384 { 385 query = query_parser.parse(query_prefix + query_suffix); 386 } 387 333 s = 0; // Reset 334 } 335 } 336 o++; 337 } 338 // If we were in the state of looking for the end of a 339 // term - then we just found it! 340 if (s == 3) { 341 mutable_query_string.append('~' + fuzziness); 342 } 343 // Reparse the query 344 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix); 345 query = query_parser.parse(mutable_query_string.toString() + query_suffix); 346 } 347 else { 348 query = query_parser.parse(query_prefix + query_suffix); 349 } 350 388 351 return query; 389 352 } … … 398 361 Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*"); 399 362 Matcher matcher = pattern.matcher(filter_string); 400 if (matcher.matches()) 401 { 363 if (matcher.matches()) { 402 364 String field_name = matcher.group(1); 403 365 boolean include_lower = matcher.group(2).equals("["); … … 406 368 boolean include_upper = matcher.group(5).equals("]"); 407 369 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 408 } 409 else 410 { 370 } 371 else { 411 372 System.err.println("Error: Could not understand filter string \"" + filter_string + "\""); 412 373 } 413 374 return result; 414 375 }
Note:
See TracChangeset
for help on using the changeset viewer.