Changeset 12415 for trunk/indexers/lucene-gs
- Timestamp:
- 2006-08-08T10:11:06+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r12408 r12415 90 90 91 91 BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); 92 while (true) 93 { 94 // Read the query from STDIN 95 String query_string = in.readLine(); 96 if (query_string == null || query_string.length() == -1) 97 { 98 break; 99 } 100 ///ystem.err.println("**** query = " + query_string); 101 102 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string); 103 query_including_stop_words = query_including_stop_words.rewrite(reader); 104 105 // Split query string into the search terms and the filter terms 106 // * The first +(...) term contains the search terms so count 107 // up '(' and stop when we finish matching ')' 108 int offset = 0; 109 int paren_count = 0; 110 boolean seen_paren = false; 111 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) 112 { 113 if (query_string.charAt(offset) == '(') 114 { 115 paren_count++; 116 seen_paren = true; 117 } 118 if (query_string.charAt(offset) == ')') 119 { 120 paren_count--; 121 } 122 offset++; 123 } 124 String query_prefix = query_string.substring(0, offset); 125 String query_suffix = query_string.substring(offset); 126 127 ///ystem.err.println("Prefix: " + query_prefix); 128 ///ystem.err.println("Suffix: " + query_suffix); 129 130 Query query = query_parser.parse(query_prefix); 92 while (true) { 93 // Read the query from STDIN 94 String query_string = in.readLine(); 95 if (query_string == null || query_string.length() == -1) { 96 break; 97 } 98 ///ystem.err.println("**** query = " + query_string); 99 100 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string); 101 query_including_stop_words = query_including_stop_words.rewrite(reader); 102 103 Query query = parseQuery(reader, query_parser, query_string, fuzzy); 131 104 query = query.rewrite(reader); 132 133 // If this is a fuzzy search, then we need to add the fuzzy134 // flag to each of the query terms135 if (fuzzy && query.toString().length() > 0)136 {137 // Revert the query to a string138 ///ystem.err.println("Rewritten query: " + query.toString());139 // Search through the string for TX:<term> query terms140 // and append the ~ operator. Not that this search will141 // not change phrase searches (TX:"<term> <term>") as142 // fuzzy searching is not possible for these entries.143 // Yahoo! Time for a state machine!144 StringBuffer mutable_query_string = new StringBuffer(query.toString());145 int o = 0; // Offset146 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:147 int s = 0; // State148 while(o < mutable_query_string.length())149 {150 char c = mutable_query_string.charAt(o);151 if (s == 0 && c == 'T')152 {153 ///ystem.err.println("Found T!");154 s = 1;155 }156 else if (s == 1)157 {158 if (c == 'X')159 {160 ///ystem.err.println("Found X!");161 s = 2;162 }163 else164 {165 s = 0; // Reset166 }167 }168 else if (s == 2)169 {170 if (c == ':')171 {172 ///ystem.err.println("Found TX:!");173 s = 3;174 }175 else176 {177 s = 0; // Reset178 }179 }180 else if (s == 3)181 {182 // Don't process phrases183 if (c == '"')184 {185 ///ystem.err.println("Stupid phrase...");186 s = 0; // Reset187 }188 // Found the end of the term... add the189 // fuzzy search indicator190 // Nor outside the scope of parentheses191 else if (Character.isWhitespace(c) || c == ')')192 {193 ///ystem.err.println("Yahoo! Found fuzzy term.");194 mutable_query_string.insert(o, '~');195 o++;196 s = 0; // Reset197 }198 }199 o++;200 }201 // If we were in the state of looking for the end of a202 // term - then we just found it!203 if (s == 3)204 {205 mutable_query_string.append('~');206 }207 // Reparse the query208 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);209 query = query_parser.parse(mutable_query_string.toString() + query_suffix);210 // And rewrite again211 query = query.rewrite(reader);212 ///ystem.err.println("Rewritten Fuzzy query: " + query.toString());213 }214 else215 {216 query = query_parser.parse(query_prefix + query_suffix);217 query = query.rewrite(reader);218 }219 105 220 106 // Perform the query … … 228 114 System.out.println("<ResultSet>"); 229 115 System.out.println(" <QueryString>" + query_string + "</QueryString>"); 116 230 117 // Return the list of expanded query terms and their frequencies 231 118 HashMap term_counts = new HashMap(); … … 233 120 HashSet terms = new HashSet(); 234 121 query.extractTerms(terms); 235 //System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>");236 122 Iterator iter = terms.iterator(); 237 123 while (iter.hasNext()) … … 347 233 return null; 348 234 } 235 236 237 private static Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, boolean fuzzy) 238 throws java.io.IOException, org.apache.lucene.queryParser.ParseException 239 { 240 // Split query string into the search terms and the filter terms 241 // * The first +(...) term contains the search terms so count 242 // up '(' and stop when we finish matching ')' 243 int offset = 0; 244 int paren_count = 0; 245 boolean seen_paren = false; 246 while (offset < query_string.length() && (!seen_paren || paren_count > 0)) 247 { 248 if (query_string.charAt(offset) == '(') 249 { 250 paren_count++; 251 seen_paren = true; 252 } 253 if (query_string.charAt(offset) == ')') 254 { 255 paren_count--; 256 } 257 offset++; 258 } 259 String query_prefix = query_string.substring(0, offset); 260 String query_suffix = query_string.substring(offset); 261 262 ///ystem.err.println("Prefix: " + query_prefix); 263 ///ystem.err.println("Suffix: " + query_suffix); 264 265 Query query = query_parser.parse(query_prefix); 266 query = query.rewrite(reader); 267 268 // If this is a fuzzy search, then we need to add the fuzzy 269 // flag to each of the query terms 270 if (fuzzy && query.toString().length() > 0) 271 { 272 // Revert the query to a string 273 System.err.println("Rewritten query: " + query.toString()); 274 // Search through the string for TX:<term> query terms 275 // and append the ~ operator. Not that this search will 276 // not change phrase searches (TX:"<term> <term>") as 277 // fuzzy searching is not possible for these entries. 278 // Yahoo! Time for a state machine! 279 StringBuffer mutable_query_string = new StringBuffer(query.toString()); 280 int o = 0; // Offset 281 // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX: 282 int s = 0; // State 283 while(o < mutable_query_string.length()) 284 { 285 char c = mutable_query_string.charAt(o); 286 if (s == 0 && c == 'T') 287 { 288 ///ystem.err.println("Found T!"); 289 s = 1; 290 } 291 else if (s == 1) 292 { 293 if (c == 'X') 294 { 295 ///ystem.err.println("Found X!"); 296 s = 2; 297 } 298 else 299 { 300 s = 0; // Reset 301 } 302 } 303 else if (s == 2) 304 { 305 if (c == ':') 306 { 307 ///ystem.err.println("Found TX:!"); 308 s = 3; 309 } 310 else 311 { 312 s = 0; // Reset 313 } 314 } 315 else if (s == 3) 316 { 317 // Don't process phrases 318 if (c == '"') 319 { 320 ///ystem.err.println("Stupid phrase..."); 321 s = 0; // Reset 322 } 323 // Found the end of the term... add the 324 // fuzzy search indicator 325 // Nor outside the scope of parentheses 326 else if (Character.isWhitespace(c) || c == ')') 327 { 328 ///ystem.err.println("Yahoo! Found fuzzy term."); 329 mutable_query_string.insert(o, '~'); 330 o++; 331 s = 0; // Reset 332 } 333 } 334 o++; 335 } 336 // If we were in the state of looking for the end of a 337 // term - then we just found it! 338 if (s == 3) 339 { 340 mutable_query_string.append('~'); 341 } 342 // Reparse the query 343 ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix); 344 query = query_parser.parse(mutable_query_string.toString() + query_suffix); 345 } 346 else 347 { 348 query = query_parser.parse(query_prefix + query_suffix); 349 } 350 351 return query; 352 } 349 353 }
Note:
See TracChangeset
for help on using the changeset viewer.