Context Navigation

← Previous Changeset
Next Changeset →

Changeset 12772

Timestamp:

2006-09-18T15:56:03+12:00 (18 years ago)

Author:

kjdon

Message:

put the { of an if or while or else statement on the same line as the operator (where it belongs) and re-indented so we haven't got a ridiculous amount of tab whitespace. Ah, doesn't that look better...

Location:

trunk

Files:

: 2 edited

gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java (modified) (9 diffs)
indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

-              r12770
+              r12772
             // definable
             String default_conjuction_operator = "OR";
+            for (int i = 1; i < args.length; i++)
+                {
+                    if (args[i].equals("-sort"))
+                        {
+                            i++;
+                            sorter = new Sort(args[i]);
+                        }
+                    if (args[i].equals("-filter"))
+                        {
+                            i++;
+                // Parse up filter
+                filter = parseFilterString(args[i]);
+                        }
+                    if (args[i].equals("-dco"))
+                        {
+                            i++;
+                            default_conjuction_operator = args[i];
+                        }
+                    if (args[i].equals("-fuzziness"))
+                        {
+                i++;
+                fuzziness = args[i];
+                        }
+            if (args[i].equals("-startresults"))
+            {
+                i++;
+                if (args[i].matches("\\d+"))
+                {
+                    start_results = Integer.parseInt(args[i]);
+                }
+            }
+            if (args[i].equals("-endresults"))
+            {
+                i++;
+                if (args[i].matches("\\d+"))
+                {
+                    end_results = Integer.parseInt(args[i]);
+                }
+            }
+                }
+            for (int i = 1; i < args.length; i++) {
+        if (args[i].equals("-sort")) {
+            i++;
+            sorter = new Sort(args[i]);
+        }
+        if (args[i].equals("-filter")) {
+            i++;
+            // Parse up filter
+            filter = parseFilterString(args[i]);
+        }
+        if (args[i].equals("-dco")) {
+            i++;
+            default_conjuction_operator = args[i];
+        }
+        if (args[i].equals("-fuzziness")) {
+            i++;
+            fuzziness = args[i];
+        }
+        if (args[i].equals("-startresults")) {
+            i++;
+            if (args[i].matches("\\d+")) {
+            start_results = Integer.parseInt(args[i]);
+            }
+        }
+        if (args[i].equals("-endresults")) {
+            i++;
+            if (args[i].matches("\\d+")) {
+            end_results = Integer.parseInt(args[i]);
+            }
+        }
+        }
         // Lucene does "OR" queries by default; do an "AND" query if specified
+        if (default_conjuction_operator.equals("AND"))
+                {
+                    query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
+                    query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
+                }
+        if (default_conjuction_operator.equals("AND")) {
+        query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
+        query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
+        }
         BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
 …
         System.out.println("<ResultSet>");
         System.out.println("  <QueryString>" + query_string + "</QueryString>");
+        if (filter != null)
+            {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+            }
+        if (filter != null) {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+        }
         try {
 …
             query.extractTerms(terms);
             Iterator iter = terms.iterator();
+            while (iter.hasNext())
+            {
+                Term term = (Term) iter.next();
+                // If you wanted to limit this to just TX terms add
+                // something like this:
+                //if (term.field().equals("TX"))
+                term_counts.put(term.text(), new Integer(0));
+                term_fields.put(term.text(), term.field());
+            }
+            while (iter.hasNext()) {
+            Term term = (Term) iter.next();
+            // If you wanted to limit this to just TX terms add
+            // something like this:
+            //if (term.field().equals("TX"))
+            term_counts.put(term.text(), new Integer(0));
+            term_fields.put(term.text(), term.field());
+            }
             // Do we need to use a hit iterator to get sorted results?
 …
             int counter = 1;
             Iterator hit_iter = hits.iterator();
+            while (hit_iter.hasNext())
+            {
+                Hit hit = (Hit) hit_iter.next();
+                Document doc = hit.getDocument();
+                String node_id = doc.get("nodeID");
+                // May not be paging results
+                if (start_results == 1 && end_results == -1)
+                {
+                    System.out.println("  <Match id=\"" + node_id + "\" />");
+            while (hit_iter.hasNext()) {
+            Hit hit = (Hit) hit_iter.next();
+            Document doc = hit.getDocument();
+            String node_id = doc.get("nodeID");
+            // May not be paging results
+            if (start_results == 1 && end_results == -1) {
+                System.out.println("  <Match id=\"" + node_id + "\" />");
+            }
+            // Otherwise skip up until page offset
+            else if (start_results <= counter && counter <= end_results) {
+                System.out.println("  <Match id=\"" + node_id + "\" />");
+            }
+            // And skip all the rest
+            // From the document, extract the Term Vector for the
+            // TX field
+            TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
+            if (term_freq_vector != null && term_freq_vector.size() > 0) {
+                int[] term_frequencies = term_freq_vector.getTermFrequencies();
+                // Now for each query term, determine the
+                // frequency - which may of course be 0.
+                Set term_counts_set = term_counts.keySet();
+                Iterator terms_iter = term_counts_set.iterator();
+                while (terms_iter.hasNext()) {
+                String term = (String) terms_iter.next();
+                Integer count_integer = (Integer) term_counts.get(term);
+                int count = count_integer.intValue();
+                int index = term_freq_vector.indexOf(term);
+                // If the term has a count, then add to
+                // the total count for this term
+                if (index != -1) {
+                    count += term_frequencies[index];
+                }
+                // Otherwise skip up until page offset
+                else if (start_results <= counter && counter <= end_results)
+                {
+                    System.out.println("  <Match id=\"" + node_id + "\" />");
+                }
+                // And skip all the rest
+                // From the document, extract the Term Vector for the
+                // TX field
+                TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
+                if (term_freq_vector != null && term_freq_vector.size() > 0)
+                {
+                    int[] term_frequencies = term_freq_vector.getTermFrequencies();
+                    // Now for each query term, determine the
+                    // frequency - which may of course be 0.
+                    Set term_counts_set = term_counts.keySet();
+                    Iterator terms_iter = term_counts_set.iterator();
+                    while (terms_iter.hasNext())
+                    {
+                        String term = (String) terms_iter.next();
+                        Integer count_integer = (Integer) term_counts.get(term);
+                        int count = count_integer.intValue();
+                        int index = term_freq_vector.indexOf(term);
+                        // If the term has a count, then add to
+                        // the total count for this term
+                        if (index != -1)
+                        {
+                            count += term_frequencies[index];
+                        }
+                        // Store the result
+                        term_counts.put(term, new Integer(count));
+                        count_integer = null;
+                        term = null;
+                    }
+                    terms_iter = null;
+                    term_counts_set = null;
+                }
+                else
+                {
+                    ///ystem.err.println("Error! Missing term vector for document " + hit.getId());
+                }
+                ++counter;
+            }
+                // Store the result
+                term_counts.put(term, new Integer(count));
+                count_integer = null;
+                term = null;
+                }
+                terms_iter = null;
+                term_counts_set = null;
+            }
+            else {
+                ///ystem.err.println("Error! Missing term vector for document " + hit.getId());
+            }
+            ++counter;
+            }
             // Retrieve all the useful terms
             Set term_counts_set = term_counts.keySet();
 …
             // Iterate over them
             Iterator terms_iter = term_counts_set.iterator();
+            while (terms_iter.hasNext())
+            {
+                String term = (String) terms_iter.next();
+                Integer count = (Integer) term_counts.get(term);
+                String field = (String) term_fields.get(term);
+                System.out.println("  <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />");
+                count = null;
+                term = null;
+            }
+            while (terms_iter.hasNext()) {
+            String term = (String) terms_iter.next();
+            Integer count = (Integer) term_counts.get(term);
+            String field = (String) term_fields.get(term);
+            System.out.println("  <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />");
+            count = null;
+            term = null;
+            }
             // Cleanup
             terms_iter = null;
 …
     int paren_count = 0;
     boolean seen_paren = false;
+    while (offset < query_string.length() && (!seen_paren || paren_count > 0))
+        {
+        if (query_string.charAt(offset) == '(')
+            {
+            paren_count++;
+            seen_paren = true;
+            }
+        if (query_string.charAt(offset) == ')')
+            {
+            paren_count--;
+            }
+        offset++;
+        }
+    while (offset < query_string.length() && (!seen_paren || paren_count > 0)) {
+        if (query_string.charAt(offset) == '(') {
+        paren_count++;
+        seen_paren = true;
+        }
+        if (query_string.charAt(offset) == ')') {
+        paren_count--;
+        }
+        offset++;
+    }
     String query_prefix = query_string.substring(0, offset);
     String query_suffix = query_string.substring(offset);
 …
     // If this is a fuzzy search, then we need to add the fuzzy
     // flag to each of the query terms
+    if (fuzziness != null && query.toString().length() > 0)
+        {
+        // Revert the query to a string
+        System.err.println("Rewritten query: " + query.toString());
+        // Search through the string for TX:<term> query terms
+        // and append the ~ operator. Not that this search will
+        // not change phrase searches (TX:"<term> <term>") as
+        // fuzzy searching is not possible for these entries.
+        // Yahoo! Time for a state machine!
+        StringBuffer mutable_query_string = new StringBuffer(query.toString());
+        int o = 0; // Offset
+        // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:
+        int s = 0; // State
+        while(o < mutable_query_string.length())
+            {
+            char c = mutable_query_string.charAt(o);
+            if (s == 0 && c == 'T')
+                {
+                ///ystem.err.println("Found T!");
+                s = 1;
+                }
+            else if (s == 1)
+                {
+                if (c == 'X')
+                    {
+                    ///ystem.err.println("Found X!");
+                    s = 2;
+                    }
+                else
+                    {
+                    s = 0; // Reset
+                    }
+                }
+            else if (s == 2)
+                {
+                if (c == ':')
+                    {
+                    ///ystem.err.println("Found TX:!");
+                    s = 3;
+                    }
+                else
+                    {
+                    s = 0; // Reset
+                    }
+                }
+            else if (s == 3)
+                {
+                // Don't process phrases
+                if (c == '"')
+                    {
+                    ///ystem.err.println("Stupid phrase...");
+                    s = 0; // Reset
+                    }
+                // Found the end of the term... add the
+                // fuzzy search indicator
+                // Nor outside the scope of parentheses
+                else if (Character.isWhitespace(c) || c == ')')
+                    {
+                    ///ystem.err.println("Yahoo! Found fuzzy term.");
+                    mutable_query_string.insert(o, '~' + fuzziness);
+                    o++;
+                    s = 0; // Reset
+                    }
+                }
+    if (fuzziness != null && query.toString().length() > 0) {
+        // Revert the query to a string
+        System.err.println("Rewritten query: " + query.toString());
+        // Search through the string for TX:<term> query terms
+        // and append the ~ operator. Not that this search will
+        // not change phrase searches (TX:"<term> <term>") as
+        // fuzzy searching is not possible for these entries.
+        // Yahoo! Time for a state machine!
+        StringBuffer mutable_query_string = new StringBuffer(query.toString());
+        int o = 0; // Offset
+        // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:
+        int s = 0; // State
+        while (o < mutable_query_string.length()) {
+        char c = mutable_query_string.charAt(o);
+        if (s == 0 && c == 'T') {
+            ///ystem.err.println("Found T!");
+            s = 1;
+        }
+        else if (s == 1) {
+            if (c == 'X') {
+            ///ystem.err.println("Found X!");
+            s = 2;
+            }
+            else {
+            s = 0; // Reset
+            }
+        }
+        else if (s == 2) {
+            if (c == ':') {
+            ///ystem.err.println("Found TX:!");
+            s = 3;
+            }
+            else {
+            s = 0; // Reset
+            }
+        }
+        else if (s == 3) {
+            // Don't process phrases
+            if (c == '"') {
+            ///ystem.err.println("Stupid phrase...");
+            s = 0; // Reset
+            }
+            // Found the end of the term... add the
+            // fuzzy search indicator
+            // Nor outside the scope of parentheses
+            else if (Character.isWhitespace(c) || c == ')') {
+            ///ystem.err.println("Yahoo! Found fuzzy term.");
+            mutable_query_string.insert(o, '~' + fuzziness);
             o++;
+            }
+        // If we were in the state of looking for the end of a
+        // term - then we just found it!
+        if (s == 3)
+            {
+            mutable_query_string.append('~' + fuzziness);
+            }
+        // Reparse the query
+        ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);
+        query = query_parser.parse(mutable_query_string.toString() + query_suffix);
+        }
+    else
+        {
+        query = query_parser.parse(query_prefix + query_suffix);
+        }
+            s = 0; // Reset
+            }
+        }
+        o++;
+        }
+        // If we were in the state of looking for the end of a
+        // term - then we just found it!
+        if (s == 3) {
+        mutable_query_string.append('~' + fuzziness);
+        }
+        // Reparse the query
+        ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);
+        query = query_parser.parse(mutable_query_string.toString() + query_suffix);
+    }
+    else {
+        query = query_parser.parse(query_prefix + query_suffix);
+    }
     return query;
+    }
 …
     Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*");
     Matcher matcher = pattern.matcher(filter_string);
+    if (matcher.matches())
+      {
+    if (matcher.matches()) {
         String field_name = matcher.group(1);
         boolean include_lower = matcher.group(2).equals("[");
 …
         boolean include_upper = matcher.group(5).equals("]");
         result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
+      }
+    else
+      {
+    }
+    else {
         System.err.println("Error: Could not understand filter string \"" + filter_string + "\"");
+      }
+    }
     return result;
+  }

trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

-              r12770
+              r12772
             // definable
             String default_conjuction_operator = "OR";
+            for (int i = 1; i < args.length; i++)
+                {
+                    if (args[i].equals("-sort"))
+                        {
+                            i++;
+                            sorter = new Sort(args[i]);
+                        }
+                    if (args[i].equals("-filter"))
+                        {
+                            i++;
+                // Parse up filter
+                filter = parseFilterString(args[i]);
+                        }
+                    if (args[i].equals("-dco"))
+                        {
+                            i++;
+                            default_conjuction_operator = args[i];
+                        }
+                    if (args[i].equals("-fuzziness"))
+                        {
+                i++;
+                fuzziness = args[i];
+                        }
+            if (args[i].equals("-startresults"))
+            {
+                i++;
+                if (args[i].matches("\\d+"))
+                {
+                    start_results = Integer.parseInt(args[i]);
+                }
+            }
+            if (args[i].equals("-endresults"))
+            {
+                i++;
+                if (args[i].matches("\\d+"))
+                {
+                    end_results = Integer.parseInt(args[i]);
+                }
+            }
+                }
+            for (int i = 1; i < args.length; i++) {
+        if (args[i].equals("-sort")) {
+            i++;
+            sorter = new Sort(args[i]);
+        }
+        if (args[i].equals("-filter")) {
+            i++;
+            // Parse up filter
+            filter = parseFilterString(args[i]);
+        }
+        if (args[i].equals("-dco")) {
+            i++;
+            default_conjuction_operator = args[i];
+        }
+        if (args[i].equals("-fuzziness")) {
+            i++;
+            fuzziness = args[i];
+        }
+        if (args[i].equals("-startresults")) {
+            i++;
+            if (args[i].matches("\\d+")) {
+            start_results = Integer.parseInt(args[i]);
+            }
+        }
+        if (args[i].equals("-endresults")) {
+            i++;
+            if (args[i].matches("\\d+")) {
+            end_results = Integer.parseInt(args[i]);
+            }
+        }
+        }
         // Lucene does "OR" queries by default; do an "AND" query if specified
+        if (default_conjuction_operator.equals("AND"))
+                {
+                    query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
+                    query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
+                }
+        if (default_conjuction_operator.equals("AND")) {
+        query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
+        query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
+        }
         BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
 …
         System.out.println("<ResultSet>");
         System.out.println("  <QueryString>" + query_string + "</QueryString>");
+        if (filter != null)
+            {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+            }
+        if (filter != null) {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+        }
         try {
 …
             query.extractTerms(terms);
             Iterator iter = terms.iterator();
+            while (iter.hasNext())
+            {
+                Term term = (Term) iter.next();
+                // If you wanted to limit this to just TX terms add
+                // something like this:
+                //if (term.field().equals("TX"))
+                term_counts.put(term.text(), new Integer(0));
+                term_fields.put(term.text(), term.field());
+            }
+            while (iter.hasNext()) {
+            Term term = (Term) iter.next();
+            // If you wanted to limit this to just TX terms add
+            // something like this:
+            //if (term.field().equals("TX"))
+            term_counts.put(term.text(), new Integer(0));
+            term_fields.put(term.text(), term.field());
+            }
             // Do we need to use a hit iterator to get sorted results?
 …
             int counter = 1;
             Iterator hit_iter = hits.iterator();
+            while (hit_iter.hasNext())
+            {
+                Hit hit = (Hit) hit_iter.next();
+                Document doc = hit.getDocument();
+                String node_id = doc.get("nodeID");
+                // May not be paging results
+                if (start_results == 1 && end_results == -1)
+                {
+                    System.out.println("  <Match id=\"" + node_id + "\" />");
+            while (hit_iter.hasNext()) {
+            Hit hit = (Hit) hit_iter.next();
+            Document doc = hit.getDocument();
+            String node_id = doc.get("nodeID");
+            // May not be paging results
+            if (start_results == 1 && end_results == -1) {
+                System.out.println("  <Match id=\"" + node_id + "\" />");
+            }
+            // Otherwise skip up until page offset
+            else if (start_results <= counter && counter <= end_results) {
+                System.out.println("  <Match id=\"" + node_id + "\" />");
+            }
+            // And skip all the rest
+            // From the document, extract the Term Vector for the
+            // TX field
+            TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
+            if (term_freq_vector != null && term_freq_vector.size() > 0) {
+                int[] term_frequencies = term_freq_vector.getTermFrequencies();
+                // Now for each query term, determine the
+                // frequency - which may of course be 0.
+                Set term_counts_set = term_counts.keySet();
+                Iterator terms_iter = term_counts_set.iterator();
+                while (terms_iter.hasNext()) {
+                String term = (String) terms_iter.next();
+                Integer count_integer = (Integer) term_counts.get(term);
+                int count = count_integer.intValue();
+                int index = term_freq_vector.indexOf(term);
+                // If the term has a count, then add to
+                // the total count for this term
+                if (index != -1) {
+                    count += term_frequencies[index];
+                }
+                // Otherwise skip up until page offset
+                else if (start_results <= counter && counter <= end_results)
+                {
+                    System.out.println("  <Match id=\"" + node_id + "\" />");
+                }
+                // And skip all the rest
+                // From the document, extract the Term Vector for the
+                // TX field
+                TermFreqVector term_freq_vector = reader.getTermFreqVector(hit.getId(), "TX");
+                if (term_freq_vector != null && term_freq_vector.size() > 0)
+                {
+                    int[] term_frequencies = term_freq_vector.getTermFrequencies();
+                    // Now for each query term, determine the
+                    // frequency - which may of course be 0.
+                    Set term_counts_set = term_counts.keySet();
+                    Iterator terms_iter = term_counts_set.iterator();
+                    while (terms_iter.hasNext())
+                    {
+                        String term = (String) terms_iter.next();
+                        Integer count_integer = (Integer) term_counts.get(term);
+                        int count = count_integer.intValue();
+                        int index = term_freq_vector.indexOf(term);
+                        // If the term has a count, then add to
+                        // the total count for this term
+                        if (index != -1)
+                        {
+                            count += term_frequencies[index];
+                        }
+                        // Store the result
+                        term_counts.put(term, new Integer(count));
+                        count_integer = null;
+                        term = null;
+                    }
+                    terms_iter = null;
+                    term_counts_set = null;
+                }
+                else
+                {
+                    ///ystem.err.println("Error! Missing term vector for document " + hit.getId());
+                }
+                ++counter;
+            }
+                // Store the result
+                term_counts.put(term, new Integer(count));
+                count_integer = null;
+                term = null;
+                }
+                terms_iter = null;
+                term_counts_set = null;
+            }
+            else {
+                ///ystem.err.println("Error! Missing term vector for document " + hit.getId());
+            }
+            ++counter;
+            }
             // Retrieve all the useful terms
             Set term_counts_set = term_counts.keySet();
 …
             // Iterate over them
             Iterator terms_iter = term_counts_set.iterator();
+            while (terms_iter.hasNext())
+            {
+                String term = (String) terms_iter.next();
+                Integer count = (Integer) term_counts.get(term);
+                String field = (String) term_fields.get(term);
+                System.out.println("  <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />");
+                count = null;
+                term = null;
+            }
+            while (terms_iter.hasNext()) {
+            String term = (String) terms_iter.next();
+            Integer count = (Integer) term_counts.get(term);
+            String field = (String) term_fields.get(term);
+            System.out.println("  <Term value=\"" + term + "\" field=\"" + field + "\" freq=\"" + count.intValue() + "\" />");
+            count = null;
+            term = null;
+            }
             // Cleanup
             terms_iter = null;
 …
     int paren_count = 0;
     boolean seen_paren = false;
+    while (offset < query_string.length() && (!seen_paren || paren_count > 0))
+        {
+        if (query_string.charAt(offset) == '(')
+            {
+            paren_count++;
+            seen_paren = true;
+            }
+        if (query_string.charAt(offset) == ')')
+            {
+            paren_count--;
+            }
+        offset++;
+        }
+    while (offset < query_string.length() && (!seen_paren || paren_count > 0)) {
+        if (query_string.charAt(offset) == '(') {
+        paren_count++;
+        seen_paren = true;
+        }
+        if (query_string.charAt(offset) == ')') {
+        paren_count--;
+        }
+        offset++;
+    }
     String query_prefix = query_string.substring(0, offset);
     String query_suffix = query_string.substring(offset);
 …
     // If this is a fuzzy search, then we need to add the fuzzy
     // flag to each of the query terms
+    if (fuzziness != null && query.toString().length() > 0)
+        {
+        // Revert the query to a string
+        System.err.println("Rewritten query: " + query.toString());
+        // Search through the string for TX:<term> query terms
+        // and append the ~ operator. Not that this search will
+        // not change phrase searches (TX:"<term> <term>") as
+        // fuzzy searching is not possible for these entries.
+        // Yahoo! Time for a state machine!
+        StringBuffer mutable_query_string = new StringBuffer(query.toString());
+        int o = 0; // Offset
+        // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:
+        int s = 0; // State
+        while(o < mutable_query_string.length())
+            {
+            char c = mutable_query_string.charAt(o);
+            if (s == 0 && c == 'T')
+                {
+                ///ystem.err.println("Found T!");
+                s = 1;
+                }
+            else if (s == 1)
+                {
+                if (c == 'X')
+                    {
+                    ///ystem.err.println("Found X!");
+                    s = 2;
+                    }
+                else
+                    {
+                    s = 0; // Reset
+                    }
+                }
+            else if (s == 2)
+                {
+                if (c == ':')
+                    {
+                    ///ystem.err.println("Found TX:!");
+                    s = 3;
+                    }
+                else
+                    {
+                    s = 0; // Reset
+                    }
+                }
+            else if (s == 3)
+                {
+                // Don't process phrases
+                if (c == '"')
+                    {
+                    ///ystem.err.println("Stupid phrase...");
+                    s = 0; // Reset
+                    }
+                // Found the end of the term... add the
+                // fuzzy search indicator
+                // Nor outside the scope of parentheses
+                else if (Character.isWhitespace(c) || c == ')')
+                    {
+                    ///ystem.err.println("Yahoo! Found fuzzy term.");
+                    mutable_query_string.insert(o, '~' + fuzziness);
+                    o++;
+                    s = 0; // Reset
+                    }
+                }
+    if (fuzziness != null && query.toString().length() > 0) {
+        // Revert the query to a string
+        System.err.println("Rewritten query: " + query.toString());
+        // Search through the string for TX:<term> query terms
+        // and append the ~ operator. Not that this search will
+        // not change phrase searches (TX:"<term> <term>") as
+        // fuzzy searching is not possible for these entries.
+        // Yahoo! Time for a state machine!
+        StringBuffer mutable_query_string = new StringBuffer(query.toString());
+        int o = 0; // Offset
+        // 0 = BASE, 1 = SEEN_T, 2 = SEEN_TX, 3 = SEEN_TX:
+        int s = 0; // State
+        while (o < mutable_query_string.length()) {
+        char c = mutable_query_string.charAt(o);
+        if (s == 0 && c == 'T') {
+            ///ystem.err.println("Found T!");
+            s = 1;
+        }
+        else if (s == 1) {
+            if (c == 'X') {
+            ///ystem.err.println("Found X!");
+            s = 2;
+            }
+            else {
+            s = 0; // Reset
+            }
+        }
+        else if (s == 2) {
+            if (c == ':') {
+            ///ystem.err.println("Found TX:!");
+            s = 3;
+            }
+            else {
+            s = 0; // Reset
+            }
+        }
+        else if (s == 3) {
+            // Don't process phrases
+            if (c == '"') {
+            ///ystem.err.println("Stupid phrase...");
+            s = 0; // Reset
+            }
+            // Found the end of the term... add the
+            // fuzzy search indicator
+            // Nor outside the scope of parentheses
+            else if (Character.isWhitespace(c) || c == ')') {
+            ///ystem.err.println("Yahoo! Found fuzzy term.");
+            mutable_query_string.insert(o, '~' + fuzziness);
             o++;
+            }
+        // If we were in the state of looking for the end of a
+        // term - then we just found it!
+        if (s == 3)
+            {
+            mutable_query_string.append('~' + fuzziness);
+            }
+        // Reparse the query
+        ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);
+        query = query_parser.parse(mutable_query_string.toString() + query_suffix);
+        }
+    else
+        {
+        query = query_parser.parse(query_prefix + query_suffix);
+        }
+            s = 0; // Reset
+            }
+        }
+        o++;
+        }
+        // If we were in the state of looking for the end of a
+        // term - then we just found it!
+        if (s == 3) {
+        mutable_query_string.append('~' + fuzziness);
+        }
+        // Reparse the query
+        ///ystem.err.println("Fuzzy query: " + mutable_query_string.toString() + query_suffix);
+        query = query_parser.parse(mutable_query_string.toString() + query_suffix);
+    }
+    else {
+        query = query_parser.parse(query_prefix + query_suffix);
+    }
     return query;
+    }
 …
     Pattern pattern = Pattern.compile("\\s*\\+(\\w+)\\:([\\{\\[])(\\d+)\\s+TO\\s+(\\d+)([\\}\\]])\\s*");
     Matcher matcher = pattern.matcher(filter_string);
+    if (matcher.matches())
+      {
+    if (matcher.matches()) {
         String field_name = matcher.group(1);
         boolean include_lower = matcher.group(2).equals("[");
 …
         boolean include_upper = matcher.group(5).equals("]");
         result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
+      }
+    else
+      {
+    }
+    else {
         System.err.println("Error: Could not understand filter string \"" + filter_string + "\"");
+      }
+    }
     return result;
+  }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 12772

Legend:

trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

Download in other formats: