Changeset 12983
- Timestamp:
- 2006-10-03T16:55:27+13:00 (18 years ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java
r12981 r12983 111 111 break; 112 112 } 113 System.out.println("<ResultSet>"); 114 System.out.println(" <QueryString>" + xmlSafe(query_string) + "</QueryString>"); 115 if (filter != null) { 116 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 117 } 118 119 try { 120 Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness); 121 query_including_stop_words = query_including_stop_words.rewrite(reader); 122 123 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 124 query = query.rewrite(reader); 125 126 // Return the list of expanded query terms and their frequencies 127 HashSet terms = new HashSet(); 128 query.extractTerms(terms); 129 Iterator term_iterator = terms.iterator(); 130 System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>"); 131 while (term_iterator.hasNext()) { 132 Term term = (Term) term_iterator.next(); 133 134 // Get the term frequency over all the documents 135 TermDocs term_docs = reader.termDocs(term); 136 int term_freq = term_docs.freq(); 137 while (term_docs.next()) { 138 term_freq += term_docs.freq(); 139 } 140 141 // If you wanted to limit this to just text terms add 142 // something like this: 143 // if (term.field().equals(TEXTFIELD)) 144 System.out.println(" <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />"); 145 } 146 147 // Return the list of stop words removed from the query 148 HashSet terms_including_stop_words = new HashSet(); 149 query_including_stop_words.extractTerms(terms_including_stop_words); 150 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator(); 151 while (terms_including_stop_words_iter.hasNext()) { 152 Term term = (Term) terms_including_stop_words_iter.next(); 153 if (!terms.contains(term)) { 154 System.out.println(" <StopWord value=\"" + term.text() + "\"/>"); 155 } 156 } 157 158 // Simple case for getting all the matching documents 159 if (end_results == Integer.MAX_VALUE) { 160 // Perform the query (filter and sorter may be null) 161 Hits hits = searcher.search(query, filter, sorter); 162 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 163 164 // Output the matching documents 165 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 166 System.out.println(" <EndsResults num=\"" + hits.length() + "\" />"); 167 for (int i = start_results; i <= hits.length(); i++) { 168 Document doc = hits.doc(i - 1); 169 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 170 } 171 } 172 173 // Slightly more complicated case for returning a subset of the matching documents 174 else { 175 // Perform the query (filter may be null) 176 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 177 System.out.println(" <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>"); 178 179 // Output the matching documents 180 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 181 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 182 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) { 183 Document doc = reader.document(hits.scoreDocs[i - 1].doc); 184 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 185 } 186 } 187 } 188 catch (ParseException parse_exception) { 189 System.out.println(" <Error type=\"PARSE_EXCEPTION\"/>"); 190 } 191 catch (TooManyClauses too_many_clauses_exception) { 192 System.out.println(" <Error type=\"TOO_MANY_CLAUSES\"/>"); 193 } 194 195 System.out.println("</ResultSet>"); 196 } 197 198 searcher.close(); 113 114 runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string); 115 } 199 116 } 200 117 catch (IOException exception) { … … 202 119 } 203 120 } 121 122 123 private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string) 124 throws IOException 125 { 126 try { 127 Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness); 128 query_including_stop_words = query_including_stop_words.rewrite(reader); 129 130 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 131 query = query.rewrite(reader); 132 133 // Return the list of expanded query terms and their frequencies 134 HashSet terms = new HashSet(); 135 query.extractTerms(terms); 136 Iterator term_iterator = terms.iterator(); 137 System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>"); 138 while (term_iterator.hasNext()) { 139 Term term = (Term) term_iterator.next(); 140 141 // Get the term frequency over all the documents 142 TermDocs term_docs = reader.termDocs(term); 143 int term_freq = term_docs.freq(); 144 while (term_docs.next()) { 145 term_freq += term_docs.freq(); 146 } 147 148 // If you wanted to limit this to just text terms add 149 // something like this: 150 // if (term.field().equals(TEXTFIELD)) 151 System.out.println(" <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />"); 152 } 153 154 // Return the list of stop words removed from the query 155 HashSet terms_including_stop_words = new HashSet(); 156 query_including_stop_words.extractTerms(terms_including_stop_words); 157 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator(); 158 while (terms_including_stop_words_iter.hasNext()) { 159 Term term = (Term) terms_including_stop_words_iter.next(); 160 if (!terms.contains(term)) { 161 System.out.println(" <StopWord value=\"" + term.text() + "\"/>"); 162 } 163 } 164 165 // Simple case for getting all the matching documents 166 if (end_results == Integer.MAX_VALUE) { 167 // Perform the query (filter and sorter may be null) 168 Hits hits = searcher.search(query, filter, sorter); 169 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 170 171 // Output the matching documents 172 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 173 System.out.println(" <EndsResults num=\"" + hits.length() + "\" />"); 174 for (int i = start_results; i <= hits.length(); i++) { 175 Document doc = hits.doc(i - 1); 176 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 177 } 178 } 179 180 // Slightly more complicated case for returning a subset of the matching documents 181 else { 182 // Perform the query (filter may be null) 183 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 184 System.out.println(" <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>"); 185 186 // Output the matching documents 187 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 188 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 189 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) { 190 Document doc = reader.document(hits.scoreDocs[i - 1].doc); 191 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 192 } 193 } 194 } 195 catch (ParseException parse_exception) { 196 System.out.println(" <Error type=\"PARSE_EXCEPTION\"/>"); 197 } 198 catch (TooManyClauses too_many_clauses_exception) { 199 System.out.println(" <Error type=\"TOO_MANY_CLAUSES\"/>"); 200 } 201 202 System.out.println("</ResultSet>"); 203 } 204 204 205 205 206 private static String xmlSafe(String text) { -
trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r12981 r12983 111 111 break; 112 112 } 113 System.out.println("<ResultSet>"); 114 System.out.println(" <QueryString>" + xmlSafe(query_string) + "</QueryString>"); 115 if (filter != null) { 116 System.out.println(" <FilterString>" + filter.toString() + "</FilterString>"); 117 } 118 119 try { 120 Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness); 121 query_including_stop_words = query_including_stop_words.rewrite(reader); 122 123 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 124 query = query.rewrite(reader); 125 126 // Return the list of expanded query terms and their frequencies 127 HashSet terms = new HashSet(); 128 query.extractTerms(terms); 129 Iterator term_iterator = terms.iterator(); 130 System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>"); 131 while (term_iterator.hasNext()) { 132 Term term = (Term) term_iterator.next(); 133 134 // Get the term frequency over all the documents 135 TermDocs term_docs = reader.termDocs(term); 136 int term_freq = term_docs.freq(); 137 while (term_docs.next()) { 138 term_freq += term_docs.freq(); 139 } 140 141 // If you wanted to limit this to just text terms add 142 // something like this: 143 // if (term.field().equals(TEXTFIELD)) 144 System.out.println(" <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />"); 145 } 146 147 // Return the list of stop words removed from the query 148 HashSet terms_including_stop_words = new HashSet(); 149 query_including_stop_words.extractTerms(terms_including_stop_words); 150 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator(); 151 while (terms_including_stop_words_iter.hasNext()) { 152 Term term = (Term) terms_including_stop_words_iter.next(); 153 if (!terms.contains(term)) { 154 System.out.println(" <StopWord value=\"" + term.text() + "\"/>"); 155 } 156 } 157 158 // Simple case for getting all the matching documents 159 if (end_results == Integer.MAX_VALUE) { 160 // Perform the query (filter and sorter may be null) 161 Hits hits = searcher.search(query, filter, sorter); 162 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 163 164 // Output the matching documents 165 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 166 System.out.println(" <EndsResults num=\"" + hits.length() + "\" />"); 167 for (int i = start_results; i <= hits.length(); i++) { 168 Document doc = hits.doc(i - 1); 169 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 170 } 171 } 172 173 // Slightly more complicated case for returning a subset of the matching documents 174 else { 175 // Perform the query (filter may be null) 176 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 177 System.out.println(" <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>"); 178 179 // Output the matching documents 180 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 181 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 182 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) { 183 Document doc = reader.document(hits.scoreDocs[i - 1].doc); 184 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 185 } 186 } 187 } 188 catch (ParseException parse_exception) { 189 System.out.println(" <Error type=\"PARSE_EXCEPTION\"/>"); 190 } 191 catch (TooManyClauses too_many_clauses_exception) { 192 System.out.println(" <Error type=\"TOO_MANY_CLAUSES\"/>"); 193 } 194 195 System.out.println("</ResultSet>"); 196 } 197 198 searcher.close(); 113 114 runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string); 115 } 199 116 } 200 117 catch (IOException exception) { … … 202 119 } 203 120 } 121 122 123 private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string) 124 throws IOException 125 { 126 try { 127 Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness); 128 query_including_stop_words = query_including_stop_words.rewrite(reader); 129 130 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 131 query = query.rewrite(reader); 132 133 // Return the list of expanded query terms and their frequencies 134 HashSet terms = new HashSet(); 135 query.extractTerms(terms); 136 Iterator term_iterator = terms.iterator(); 137 System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>"); 138 while (term_iterator.hasNext()) { 139 Term term = (Term) term_iterator.next(); 140 141 // Get the term frequency over all the documents 142 TermDocs term_docs = reader.termDocs(term); 143 int term_freq = term_docs.freq(); 144 while (term_docs.next()) { 145 term_freq += term_docs.freq(); 146 } 147 148 // If you wanted to limit this to just text terms add 149 // something like this: 150 // if (term.field().equals(TEXTFIELD)) 151 System.out.println(" <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />"); 152 } 153 154 // Return the list of stop words removed from the query 155 HashSet terms_including_stop_words = new HashSet(); 156 query_including_stop_words.extractTerms(terms_including_stop_words); 157 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator(); 158 while (terms_including_stop_words_iter.hasNext()) { 159 Term term = (Term) terms_including_stop_words_iter.next(); 160 if (!terms.contains(term)) { 161 System.out.println(" <StopWord value=\"" + term.text() + "\"/>"); 162 } 163 } 164 165 // Simple case for getting all the matching documents 166 if (end_results == Integer.MAX_VALUE) { 167 // Perform the query (filter and sorter may be null) 168 Hits hits = searcher.search(query, filter, sorter); 169 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>"); 170 171 // Output the matching documents 172 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 173 System.out.println(" <EndsResults num=\"" + hits.length() + "\" />"); 174 for (int i = start_results; i <= hits.length(); i++) { 175 Document doc = hits.doc(i - 1); 176 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 177 } 178 } 179 180 // Slightly more complicated case for returning a subset of the matching documents 181 else { 182 // Perform the query (filter may be null) 183 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 184 System.out.println(" <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>"); 185 186 // Output the matching documents 187 System.out.println(" <StartResults num=\"" + start_results + "\" />"); 188 System.out.println(" <EndsResults num=\"" + end_results + "\" />"); 189 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) { 190 Document doc = reader.document(hits.scoreDocs[i - 1].doc); 191 System.out.println(" <Match id=\"" + doc.get("nodeID") + "\" />"); 192 } 193 } 194 } 195 catch (ParseException parse_exception) { 196 System.out.println(" <Error type=\"PARSE_EXCEPTION\"/>"); 197 } 198 catch (TooManyClauses too_many_clauses_exception) { 199 System.out.println(" <Error type=\"TOO_MANY_CLAUSES\"/>"); 200 } 201 202 System.out.println("</ResultSet>"); 203 } 204 204 205 205 206 private static String xmlSafe(String text) {
Note:
See TracChangeset
for help on using the changeset viewer.