Context Navigation

GS2LuceneQuery.java@ 26157

Last change on this file since 26157 was 26157, checked in by ak19, 12 years ago

A much better way to ensure searches with wild cards get expanded to query terms as in GS2 which uses an older version of lucene (2.3.2). It now works as before at both document level and section level searches with wild cards. If a BooleanQuery.TooManyClauses exception is thrown, it will try rewriting the query once more, but using other rewritemethods, so that results are still returned (instead of the exception forcing it to say 0 documents are returned).

File size: 23.6 KB

Line
1	/**********************************************************************
2	*
3	* GS2LuceneQuery.java
4	*
5	* Copyright 2004 The New Zealand Digital Library Project
6	*
7	* A component of the Greenstone digital library software
8	* from the New Zealand Digital Library Project at the
9	* University of Waikato, New Zealand.
10	*
11	* This program is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	*
25	*********************************************************************/
26	package org.greenstone.LuceneWrapper3;
27
28
29	import java.io.*;
30	import java.util.*;
31	import java.util.regex.*;
32
33	import org.apache.lucene.analysis.Analyzer;
34	import org.apache.lucene.analysis.standard.StandardAnalyzer;
35	import org.apache.lucene.document.Document;
36	import org.apache.lucene.index.IndexReader;
37	import org.apache.lucene.index.Term;
38	import org.apache.lucene.index.TermDocs;
39	import org.apache.lucene.queryParser.ParseException;
40	import org.apache.lucene.queryParser.QueryParser;
41	import org.apache.lucene.search.BooleanQuery; // for the TooManyClauses exception
42	import org.apache.lucene.search.Filter;
43	import org.apache.lucene.search.IndexSearcher;
44	import org.apache.lucene.search.MultiTermQuery;
45	import org.apache.lucene.search.MultiTermQuery.ConstantScoreAutoRewrite;
46	import org.apache.lucene.search.Query;
47	import org.apache.lucene.search.TermRangeFilter;
48	import org.apache.lucene.search.Searcher;
49	import org.apache.lucene.search.ScoreDoc;
50	import org.apache.lucene.search.Sort;
51	import org.apache.lucene.search.SortField;
52	import org.apache.lucene.search.TopFieldDocs;
53
54	import org.apache.lucene.store.Directory;
55	import org.apache.lucene.store.FSDirectory;
56	import org.apache.lucene.util.Version;
57
58
59	public class GS2LuceneQuery extends SharedSoleneQuery
60	{
61	protected String full_indexdir="";
62
63	protected Sort sorter=new Sort();
64	protected Filter filter = null;
65
66	protected static Version matchVersion = Version.LUCENE_24;
67
68	protected QueryParser query_parser = null;
69	protected QueryParser query_parser_no_stop_words = null;
70	protected Searcher searcher = null;
71	protected IndexReader reader = null;
72
73	public GS2LuceneQuery() {
74	super();
75
76	// Create one query parser with the standard set of stop words, and one with none
77
78	query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
79	query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { }));
80	}
81
82
83	public boolean initialise() {
84
85	if (!super.initialise()) {
86	return false;
87	}
88
89
90	if (full_indexdir==null \|\| full_indexdir.length()==-1){
91	utf8out.println("Index directory is not indicated ");
92	utf8out.flush();
93	return false;
94	}
95
96	try {
97	Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
98	searcher = new IndexSearcher(full_indexdir_dir,true);
99	reader = ((IndexSearcher) searcher).getIndexReader();
100
101	}
102	catch (IOException exception) {
103	exception.printStackTrace();
104	return false;
105	}
106	return true;
107
108	}
109
110	public void setIndexDir(String full_indexdir) {
111	this.full_indexdir = full_indexdir;
112	}
113
114	public void setSortField(String sort_field) {
115	super.setSortField(sort_field);
116
117	if (sort_field == null) {
118	this.sorter = new Sort();
119	} else {
120	this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?
121	}
122	}
123
124	public void setFilterString(String filter_string) {
125	super.setFilterString(filter_string);
126	this.filter = parseFilterString(filter_string);
127	}
128
129	public Filter getFilter() {
130	return this.filter;
131	}
132
133
134	public LuceneQueryResult runQuery(String query_string) {
135
136	if (query_string == null \|\| query_string.equals("")) {
137	utf8out.println("The query word is not indicated ");
138	utf8out.flush();
139	return null;
140	}
141
142	LuceneQueryResult lucene_query_result=new LuceneQueryResult();
143	lucene_query_result.clear();
144
145	try {
146	Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
147	query_including_stop_words = query_including_stop_words.rewrite(reader);
148
149	// System.err.println("******* query_string " + query_string + "**");
150
151	Query query = parseQuery(reader, query_parser, query_string, fuzziness);
152
153	// GS2's LuceneWrapper uses lucene-2.3.2. GS3's LuceneWrapper3 works with lucene-3.3.0.
154	// This change in lucene core library for GS3 (present since after version 2.4.1) had the
155	// side-effect that searching on "econom*" didn't display what terms it was searching for,
156	// whereas it had done so in GS2.
157
158	// The details of this problem and its current solution are explained in the ticket
159	// http://trac.greenstone.org/ticket/845
160
161	// We need to change the settings for the rewriteMethod in order to get searches on wildcards
162	// to produce search terms again when the query gets rewritten.
163
164	// We try, in order:
165	// 1. RewriteMethod set to BooleanQuery, to get it working as in GS2 which uses lucene-2.3.2
166	// it will expand wildcard searches to its terms when searching at both section AND doc level.
167	// If that throws a TooManyClauses exception (like when searching for "a*" over lucene demo collection)
168	// 2. Then try a custom rewriteMethod which sets termCountCutoff=350 and docCountPercent cutoff=0.1%
169	// If that throws a TooManyClauses exception (could perhaps happen if the collection has a huge number of docs
170	// 3. Then try the default apache rewriteMethod with its optimum defaults of
171	// termCountCutoff=350 and docCountPercent cutoff=0.1%
172	// See http://lucene.apache.org/core/3_6_1/api/core/org/apache/lucene/search/MultiTermQuery.html
173
174	if(query instanceof MultiTermQuery) {
175	MultiTermQuery multiTermQuery = (MultiTermQuery)query;
176	multiTermQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
177	// less CPU intensive than MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
178	}
179
180	try {
181	query = query.rewrite(reader);
182	}
183	catch(BooleanQuery.TooManyClauses clauseException) {
184	// Example test case: try searching the lucene demo collection for "a*"
185	// and you'll hit this exception
186
187	lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
188
189	if(query instanceof MultiTermQuery) {
190
191	// CustomRewriteMethod: setting the docCountPercent cutoff to a custom 100%.
192	// This will at least expand the query to its terms when searching with wildcards at section-level
193	// (though it doesn't seem to work for doc-level searches, no matter what the cutoffs are set to).
194
195	MultiTermQuery.ConstantScoreAutoRewrite customRewriteMethod = new MultiTermQuery.ConstantScoreAutoRewrite();
196	customRewriteMethod.setDocCountPercent(100.0);
197	customRewriteMethod.setTermCountCutoff(350); // same as default
198
199	MultiTermQuery multiTermQuery = (MultiTermQuery)query;
200	multiTermQuery.setRewriteMethod(customRewriteMethod);
201	try {
202	query = query.rewrite(reader);
203	}
204	catch(BooleanQuery.TooManyClauses clauseExceptionAgain) {
205
206	// do what the code originally did: use the default rewriteMethod which
207	// uses a default docCountPercent=0.1 (%) and termCountCutoff=350
208
209	multiTermQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
210	query = query.rewrite(reader);
211	}
212	}
213	}
214
215	// Get the list of expanded query terms and their frequencies
216	// num docs matching, and total frequency
217	HashSet terms = new HashSet();
218	query.extractTerms(terms);
219
220	HashMap doc_term_freq_map = new HashMap();
221
222	Iterator iter = terms.iterator();
223	while (iter.hasNext()) {
224
225	Term term = (Term) iter.next();
226
227	// Get the term frequency over all the documents
228	TermDocs term_docs = reader.termDocs(term);
229	int term_freq = 0;
230	int match_docs = 0;
231	while (term_docs.next())
232	{
233	if (term_docs.freq() != 0)
234	{
235	term_freq += term_docs.freq();
236	match_docs++;
237
238	// Calculate the document-level term frequency as well
239	Integer lucene_doc_num_obj = new Integer(term_docs.doc());
240	int doc_term_freq = 0;
241	if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
242	{
243	doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
244	}
245	doc_term_freq += term_docs.freq();
246
247	doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
248	}
249	}
250
251	// Create a term
252	lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq);
253	}
254
255	// Get the list of stop words removed from the query
256	HashSet terms_including_stop_words = new HashSet();
257	query_including_stop_words.extractTerms(terms_including_stop_words);
258	Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
259	while (terms_including_stop_words_iter.hasNext()) {
260	Term term = (Term) terms_including_stop_words_iter.next();
261	if (!terms.contains(term)) {
262	lucene_query_result.addStopWord(term.text());
263	}
264	}
265
266	// do the query
267	// Simple case for getting all the matching documents
268	if (end_results == Integer.MAX_VALUE) {
269	// Perform the query (filter and sorter may be null)
270	TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
271	lucene_query_result.setTotalDocs(hits.totalHits);
272
273	// Output the matching documents
274	lucene_query_result.setStartResults(start_results);
275	lucene_query_result.setEndResults(hits.totalHits);
276
277	for (int i = start_results; i <= hits.totalHits; i++) {
278	int lucene_doc_num = hits.scoreDocs[i - 1].doc;
279	Document doc = reader.document(lucene_doc_num);
280	int doc_term_freq = 0;
281	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
282	if (doc_term_freq_object != null)
283	{
284	doc_term_freq = doc_term_freq_object.intValue();
285	}
286	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
287	}
288	}
289
290	// Slightly more complicated case for returning a subset of the matching documents
291	else {
292	// Perform the query (filter may be null)
293	TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
294	lucene_query_result.setTotalDocs(hits.totalHits);
295
296	lucene_query_result.setStartResults(start_results);
297	lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);
298
299	// Output the matching documents
300	for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
301	int lucene_doc_num = hits.scoreDocs[i - 1].doc;
302	Document doc = reader.document(lucene_doc_num);
303	int doc_term_freq = 0;
304	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
305	if (doc_term_freq_object != null)
306	{
307	doc_term_freq = doc_term_freq_object.intValue();
308	}
309	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
310	}
311	}
312	}
313
314	catch (ParseException parse_exception) {
315	lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);
316	}
317	catch (BooleanQuery.TooManyClauses too_many_clauses_exception) {
318	lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
319	}
320	catch (IOException exception) {
321	lucene_query_result.setError(LuceneQueryResult.IO_ERROR);
322	exception.printStackTrace();
323	}
324	catch (Exception exception) {
325	lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);
326	exception.printStackTrace();
327	}
328	return lucene_query_result;
329	}
330
331	public void setDefaultConjunctionOperator(String default_conjunction_operator) {
332	super.setDefaultConjunctionOperator(default_conjunction_operator);
333
334	if (default_conjunction_operator.equals("AND")) {
335	query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
336	query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
337	} else { // default is OR
338	query_parser.setDefaultOperator(query_parser.OR_OPERATOR);
339	query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);
340	}
341	}
342
343
344	public void cleanUp() {
345	super.cleanUp();
346	try {
347	if (searcher != null) {
348	searcher.close();
349	}
350	} catch (IOException exception) {
351	exception.printStackTrace();
352	}
353	}
354
355
356	protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
357	throws java.io.IOException, org.apache.lucene.queryParser.ParseException
358	{
359	// Split query string into the search terms and the filter terms
360	// * The first +(...) term contains the search terms so count
361	// up '(' and stop when we finish matching ')'
362	int offset = 0;
363	int paren_count = 0;
364	boolean seen_paren = false;
365	while (offset < query_string.length() && (!seen_paren \|\| paren_count > 0)) {
366	if (query_string.charAt(offset) == '(') {
367	paren_count++;
368	seen_paren = true;
369	}
370	if (query_string.charAt(offset) == ')') {
371	paren_count--;
372	}
373	offset++;
374	}
375	String query_prefix = query_string.substring(0, offset);
376	String query_suffix = query_string.substring(offset);
377
378	///ystem.err.println("Prefix: " + query_prefix);
379	///ystem.err.println("Suffix: " + query_suffix);
380
381	Query query = query_parser.parse(query_prefix);
382	query = query.rewrite(reader);
383
384	// If this is a fuzzy search, then we need to add the fuzzy
385	// flag to each of the query terms
386	if (fuzziness != null && query.toString().length() > 0) {
387
388	// Revert the query to a string
389	System.err.println("Rewritten query: " + query.toString());
390	// Search through the string for TX:<term> query terms
391	// and append the ~ operator. Note that this search will
392	// not change phrase searches (TX:"<term> <term>") as
393	// fuzzy searching is not possible for these entries.
394	// Yahoo! Time for a state machine!
395	StringBuffer mutable_query_string = new StringBuffer(query.toString());
396	int o = 0; // Offset
397	// 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:
398	int s = 0; // State
399	while(o < mutable_query_string.length()) {
400	char c = mutable_query_string.charAt(o);
401	if (s == 0 && c == TEXTFIELD.charAt(0)) {
402	///ystem.err.println("Found T!");
403	s = 1;
404	}
405	else if (s == 1) {
406	if (c == TEXTFIELD.charAt(1)) {
407	///ystem.err.println("Found X!");
408	s = 2;
409	}
410	else {
411	s = 0; // Reset
412	}
413	}
414	else if (s == 2) {
415	if (c == ':') {
416	///ystem.err.println("Found TX:!");
417	s = 3;
418	}
419	else {
420	s = 0; // Reset
421	}
422	}
423	else if (s == 3) {
424	// Don't process phrases
425	if (c == '"') {
426	///ystem.err.println("Stupid phrase...");
427	s = 0; // Reset
428	}
429	// Found the end of the term... add the
430	// fuzzy search indicator
431	// Nor outside the scope of parentheses
432	else if (Character.isWhitespace(c) \|\| c == ')') {
433	///ystem.err.println("Yahoo! Found fuzzy term.");
434	mutable_query_string.insert(o, '~' + fuzziness);
435	o++;
436	s = 0; // Reset
437	}
438	}
439	o++;
440	}
441	// If we were in the state of looking for the end of a
442	// term - then we just found it!
443	if (s == 3) {
444
445	mutable_query_string.append('~' + fuzziness);
446	}
447	// Reparse the query
448	///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);
449	query = query_parser.parse(mutable_query_string.toString() + query_suffix);
450	}
451	else {
452	query = query_parser.parse(query_prefix + query_suffix);
453	}
454
455	return query;
456	}
457
458	protected Filter parseFilterString(String filter_string)
459	{
460	Filter result = null;
461	Pattern pattern = Pattern.compile("\\s\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s");
462	Matcher matcher = pattern.matcher(filter_string);
463	if (matcher.matches()) {
464	String field_name = matcher.group(1);
465	boolean include_lower = matcher.group(2).equals("[");
466	String lower_term = matcher.group(3);
467	String upper_term = matcher.group(4);
468	boolean include_upper = matcher.group(5).equals("]");
469	result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
470	}
471	else {
472	System.err.println("Error: Could not understand filter string \"" + filter_string + "\"");
473	}
474	return result;
475	}
476
477
478	/** command line program and auxiliary methods */
479
480	// Fairly self-explanatory I should hope
481	static protected boolean query_result_caching_enabled = false;
482
483
484	static public void main (String args[])
485	{
486	if (args.length == 0) {
487	System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND\|OR] [-startresults number -endresults number] [query]");
488	return;
489	}
490
491	try {
492	String index_directory = args[0];
493
494	GS2LuceneQuery queryer = new GS2LuceneQuery();
495	queryer.setIndexDir(index_directory);
496
497	// Prepare the index cache directory, if query result caching is enabled
498	if (query_result_caching_enabled) {
499	// Make the index cache directory if it doesn't already exist
500	File index_cache_directory = new File(index_directory, "cache");
501	if (!index_cache_directory.exists()) {
502	index_cache_directory.mkdir();
503	}
504
505	// Disable caching if the index cache directory isn't available
506	if (!index_cache_directory.exists() \|\| !index_cache_directory.isDirectory()) {
507	query_result_caching_enabled = false;
508	}
509	}
510
511	String query_string = null;
512
513	// Parse the command-line arguments
514	for (int i = 1; i < args.length; i++) {
515	if (args[i].equals("-sort")) {
516	i++;
517	queryer.setSortField(args[i]);
518	}
519	else if (args[i].equals("-filter")) {
520	i++;
521	queryer.setFilterString(args[i]);
522	}
523	else if (args[i].equals("-dco")) {
524	i++;
525	queryer.setDefaultConjunctionOperator(args[i]);
526	}
527	else if (args[i].equals("-fuzziness")) {
528	i++;
529	queryer.setFuzziness(args[i]);
530	}
531	else if (args[i].equals("-startresults")) {
532	i++;
533	if (args[i].matches("\\d+")) {
534	queryer.setStartResults(Integer.parseInt(args[i]));
535	}
536	}
537	else if (args[i].equals("-endresults")) {
538	i++;
539	if (args[i].matches("\\d+")) {
540	queryer.setEndResults(Integer.parseInt(args[i]));
541	}
542	}
543	else {
544	query_string = args[i];
545	}
546	}
547
548	if (!queryer.initialise()) {
549	return;
550	}
551
552	// The query string has been specified as a command-line argument
553	if (query_string != null) {
554	runQueryCaching(index_directory, queryer, query_string);
555	}
556
557	// Read queries from STDIN
558	else {
559	BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
560	while (true) {
561	// Read the query from STDIN
562	query_string = in.readLine();
563	if (query_string == null \|\| query_string.length() == -1) {
564	break;
565	}
566
567	runQueryCaching(index_directory, queryer, query_string);
568
569	}
570	}
571	queryer.cleanUp();
572	}
573	catch (IOException exception) {
574	exception.printStackTrace();
575	}
576	}
577
578	protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
579	throws IOException
580	{
581	StringBuffer query_results_xml = new StringBuffer();
582
583	// Check if this query result has been cached from a previous search (if it's enabled)
584	File query_result_cache_file = null;
585	if (query_result_caching_enabled) {
586	// Generate the cache file name from the query options
587	String query_result_cache_file_name = query_string + "-";
588	String fuzziness = queryer.getFuzziness();
589	query_result_cache_file_name += ((fuzziness != null) ? fuzziness : "") + "-";
590	String filter_string = queryer.getFilterString();
591	query_result_cache_file_name += ((filter_string != null) ? filter_string : "") + "-";
592	String sort_string = queryer.getSortField();
593	query_result_cache_file_name += ((sort_string != null) ? sort_string : "") + "-";
594	String default_conjunction_operator = queryer.getDefaultConjunctionOperator();
595	query_result_cache_file_name += default_conjunction_operator + "-";
596	int start_results = queryer.getStartResults();
597	int end_results = queryer.getEndResults();
598	query_result_cache_file_name += start_results + "-" + end_results;
599	query_result_cache_file_name = fileSafe(query_result_cache_file_name);
600
601	// If the query result cache file exists, just return its contents and we're done
602	File index_cache_directory = new File(index_directory, "cache");
603	query_result_cache_file = new File(index_cache_directory, query_result_cache_file_name);
604	if (query_result_cache_file.exists() && query_result_cache_file.isFile()) {
605	FileInputStream fis = new FileInputStream(query_result_cache_file);
606	InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
607	BufferedReader buffered_reader = new BufferedReader(isr);
608	String line = "";
609	while ((line = buffered_reader.readLine()) != null) {
610	query_results_xml.append(line + "\n");
611	}
612	String query_results_xml_string = query_results_xml.toString();
613	query_results_xml_string = query_results_xml_string.replaceFirst("cached=\"false\"", "cached=\"true\"");
614
615	utf8out.print(query_results_xml_string);
616	utf8out.flush();
617
618	return;
619	}
620	}
621
622	// not cached
623	query_results_xml.append("<ResultSet cached=\"false\">\n");
624	query_results_xml.append("<QueryString>" + LuceneQueryResult.xmlSafe(query_string) + "</QueryString>\n");
625	Filter filter = queryer.getFilter();
626	if (filter != null) {
627	query_results_xml.append("<FilterString>" + filter.toString() + "</FilterString>\n");
628	}
629
630	LuceneQueryResult query_result = queryer.runQuery(query_string);
631	if (query_result == null) {
632	System.err.println("Couldn't run the query");
633	return;
634	}
635
636	if (query_result.getError() != LuceneQueryResult.NO_ERROR) {
637	query_results_xml.append("<Error type=\""+query_result.getErrorString()+"\" />\n");
638	} else {
639	query_results_xml.append(query_result.getXMLString());
640	}
641	query_results_xml.append("</ResultSet>\n");
642
643	utf8out.print(query_results_xml);
644	utf8out.flush();
645
646	// Cache this query result, if desired
647	if (query_result_caching_enabled) {
648	// Catch any exceptions thrown trying to write the query result cache file and warn about them, but don't
649	// bother with the full stack trace. It won't affect the functionality if we can't write some cache
650	// files, it will just affect the speed of subsequent requests.
651	// Example exceptions are "permission denied" errors, or "filename too long" errors (the filter string
652	// can get very long in some collections)
653	try
654	{
655	FileWriter query_result_cache_file_writer = new FileWriter(query_result_cache_file);
656	query_result_cache_file_writer.write(query_results_xml.toString());
657	query_result_cache_file_writer.close();
658	}
659	catch (Exception exception)
660	{
661	System.err.println("Warning: Exception occurred trying to write query result cache file (" + exception + ")");
662	}
663	}
664	}
665
666	protected static String fileSafe(String text)
667	{
668	StringBuffer file_safe_text = new StringBuffer();
669	for (int i = 0; i < text.length(); i++) {
670	char character = text.charAt(i);
671	if ((character >= 'A' && character <= 'Z') \|\| (character >= 'a' && character <= 'z') \|\| (character >= '0' && character <= '9') \|\| character == '-') {
672	file_safe_text.append(character);
673	}
674	else {
675	file_safe_text.append('%');
676	file_safe_text.append((int) character);
677	}
678	}
679	return file_safe_text.toString();
680	}
681
682
683	}
684
685

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneQuery.java@ 26157

Download in other formats: