Changeset 12321


Ignore:
Timestamp:
2006-07-28T12:06:09+12:00 (18 years ago)
Author:
kjdon
Message:

made MAXNUMERIC a global variable instead of a \#define. Its now a command line arg to mgpp_passes, and an option in Queryer, and a parameter to ParseQuery

Location:
trunk
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • trunk/indexers/mgpp/text/GSDLQueryParser.cpp

    r9616 r12321  
    2222#include "GSDLQueryParser.h"
    2323#include "GSDLQueryLex.h"
    24 
     24#include "words.h"
    2525
    2626static QueryNode *ParseExpression (UCArray::const_iterator &here,
     
    509509
    510510QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
    511                int defaultStemMethod) {
     511               int defaultStemMethod, int maxnumeric) {
     512  if (4 < maxnumeric < 512) {
     513    MAXNUMERIC = maxnumeric;
     514  }
    512515  UCArray::const_iterator here = queryStr.begin();
    513516  UCArray::const_iterator end = queryStr.end();
  • trunk/indexers/mgpp/text/GSDLQueryParser.h

    r6118 r12321  
    2929
    3030// returns NULL if the query could not be parsed
    31 // defaultBoolCombine determines if individual terms are combined using
     31// defaultBoolCOmbine determines if individual terms are combined using
    3232// AND or OR in the absence of specified operators.
    3333// defaultStemMethod used to set stemming and casefolding for terms where
     
    3535// should be set using the values from the preferences page
    3636QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
    37                int defaultStemMethod);
     37               int defaultStemMethod, int maxnumeric);
    3838
    3939#endif
  • trunk/indexers/mgpp/text/Queryer.cpp

    r8692 r12321  
    5050       << "\t.c0/.c1\t\tcasefolding off/on\n"
    5151       << "\t.s0/.s1\t\tstemming off/on\n"
    52        << "\t.o0/.o1\t\tshort output off/on\n\n"
     52       << "\t.o0/.o1\t\tshort output off/on\n"
     53       << "\t.m\t\tset maxnumeric (enter the number at the prompt)\n\n"
    5354       << "\t.p\t\tprint a document (enter the docnum at the prompt)\n"
    5455       << "otherwise just enter a query\n\n";
     
    101102  }
    102103
     104  int maxnumeric = 4;
     105
    103106  // debug output
    104107  cerr << "num docs: "<<indexData.bdh.num_docs       
     
    111114  // do querying
    112115  QueryInfo queryInfo;
    113   SetCStr (queryInfo.docLevel, "Document", 8);
     116  SetCStr (queryInfo.docLevel, "Doc", 3);
    114117  queryInfo.maxDocs = 50;
    115118  queryInfo.sortByRank = true;
     
    125128
    126129  UCArray docLevel;
    127   SetCStr(docLevel, "Document", 8);
     130  SetCStr(docLevel, "Doc", 3);
    128131
    129132  UCArray level;
     
    167170      }
    168171
    169 
     172      else if (queryArray[1] == 'm') {
     173    // maxnumeric
     174    int m = 0;
     175    cin >> m;
     176    cin.getline(query, 2048, '\n'); // eat up return
     177    if (4 < m < 512) {
     178      maxnumeric = m;
     179    }
     180      }
    170181      else if (queryArray[1] == 'p') {
    171182    // print
     
    239250    else {
    240251      // regular query
    241       queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod);
     252      queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod, maxnumeric);
    242253      if (queryTree == NULL) {
    243254    cout << "invalid syntax\n";
  • trunk/indexers/mgpp/text/mgpp_passes.cpp

    r9613 r12321  
    6161#include "TextEl.h"
    6262#include "TagInfo.h"
    63 
     63#include "words.h"
    6464#define MAX_PASSES 5
    6565
     
    9999"  %s [-J doc-tag] [-K level-tag] [-L index-level]\n"
    100100"  %*s [-m invf-memory] [-T1] [-T2] [-I1] [-I2] [-S]\n"
    101 "  %*s [-C] [-h] [-d directory] -f name\n\n";
     101"  %*s [-C] [-h] [-d directory] [-M maxnumeric] -f name\n\n";
    102102
    103103
     
    180180
    181181int main (int argc, char **argv) {
    182   int ch, in_fd;
     182  int ch, in_fd, maxnum;
    183183  char *filename = NULL;
    184184  bool compatMode = false;
     
    189189
    190190  opterr = 0;
    191   while ((ch=getopt(argc, argv, "J:K:L:f:d:m:I:T:SCh"))!=-1){
     191  while ((ch=getopt(argc, argv, "J:K:L:M:f:d:m:I:T:SCh"))!=-1){
    192192    switch (ch) {
    193193    case 'J':
     
    199199    case 'L':
    200200      tagInfo.SetIndexLevel (optarg);
     201      break;
     202    case 'M':
     203      maxnum = atoi(optarg);
     204      if (4 < maxnum < 512) {
     205    cout << "setting maxnum to "<<maxnum<<endl;
     206    MAXNUMERIC = maxnum;
     207      }
    201208      break;
    202209    case 'f':
  • trunk/indexers/mgpp/text/words.cpp

    r8692 r12321  
    2727#include "words.h"
    2828
     29int MAXNUMERIC = 4;
    2930
    3031/* Takes the place of the old INAWORD macro. It determines
  • trunk/indexers/mgpp/text/words.h

    r9613 r12321  
    1919 *
    2020 **************************************************************************/
     21#ifndef H_WORDS
     22#define H_WORDS
     23
    2124
    2225#include "sysfuncs.h"
     
    5053       of the program, , i.e., leave MAXSTEMLEN alone... */
    5154
    52 #define MAXNUMERIC  4
     55//#define MAXNUMERIC    4
    5356    /* Maximum number of numeric characters permitted in a word.
    5457       This avoids long sequences of numbers creating just one
    5558       word occurrence for each number. At most 10,000 all numeric
    5659       words will be permitted. */
     60
     61extern  int MAXNUMERIC;
    5762
    5863/* [RPAP - Jan 97: Stem Index Change] */
     
    163168    *(Word) = length;                                              \
    164169  }while(0)
     170
     171#endif
  • trunk/mgpp/text/GSDLQueryParser.cpp

    r9616 r12321  
    2222#include "GSDLQueryParser.h"
    2323#include "GSDLQueryLex.h"
    24 
     24#include "words.h"
    2525
    2626static QueryNode *ParseExpression (UCArray::const_iterator &here,
     
    509509
    510510QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
    511                int defaultStemMethod) {
     511               int defaultStemMethod, int maxnumeric) {
     512  if (4 < maxnumeric < 512) {
     513    MAXNUMERIC = maxnumeric;
     514  }
    512515  UCArray::const_iterator here = queryStr.begin();
    513516  UCArray::const_iterator end = queryStr.end();
  • trunk/mgpp/text/GSDLQueryParser.h

    r6118 r12321  
    2929
    3030// returns NULL if the query could not be parsed
    31 // defaultBoolCombine determines if individual terms are combined using
     31// defaultBoolCOmbine determines if individual terms are combined using
    3232// AND or OR in the absence of specified operators.
    3333// defaultStemMethod used to set stemming and casefolding for terms where
     
    3535// should be set using the values from the preferences page
    3636QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
    37                int defaultStemMethod);
     37               int defaultStemMethod, int maxnumeric);
    3838
    3939#endif
  • trunk/mgpp/text/Queryer.cpp

    r8692 r12321  
    5050       << "\t.c0/.c1\t\tcasefolding off/on\n"
    5151       << "\t.s0/.s1\t\tstemming off/on\n"
    52        << "\t.o0/.o1\t\tshort output off/on\n\n"
     52       << "\t.o0/.o1\t\tshort output off/on\n"
     53       << "\t.m\t\tset maxnumeric (enter the number at the prompt)\n\n"
    5354       << "\t.p\t\tprint a document (enter the docnum at the prompt)\n"
    5455       << "otherwise just enter a query\n\n";
     
    101102  }
    102103
     104  int maxnumeric = 4;
     105
    103106  // debug output
    104107  cerr << "num docs: "<<indexData.bdh.num_docs       
     
    111114  // do querying
    112115  QueryInfo queryInfo;
    113   SetCStr (queryInfo.docLevel, "Document", 8);
     116  SetCStr (queryInfo.docLevel, "Doc", 3);
    114117  queryInfo.maxDocs = 50;
    115118  queryInfo.sortByRank = true;
     
    125128
    126129  UCArray docLevel;
    127   SetCStr(docLevel, "Document", 8);
     130  SetCStr(docLevel, "Doc", 3);
    128131
    129132  UCArray level;
     
    167170      }
    168171
    169 
     172      else if (queryArray[1] == 'm') {
     173    // maxnumeric
     174    int m = 0;
     175    cin >> m;
     176    cin.getline(query, 2048, '\n'); // eat up return
     177    if (4 < m < 512) {
     178      maxnumeric = m;
     179    }
     180      }
    170181      else if (queryArray[1] == 'p') {
    171182    // print
     
    239250    else {
    240251      // regular query
    241       queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod);
     252      queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod, maxnumeric);
    242253      if (queryTree == NULL) {
    243254    cout << "invalid syntax\n";
  • trunk/mgpp/text/mgpp_passes.cpp

    r9613 r12321  
    6161#include "TextEl.h"
    6262#include "TagInfo.h"
    63 
     63#include "words.h"
    6464#define MAX_PASSES 5
    6565
     
    9999"  %s [-J doc-tag] [-K level-tag] [-L index-level]\n"
    100100"  %*s [-m invf-memory] [-T1] [-T2] [-I1] [-I2] [-S]\n"
    101 "  %*s [-C] [-h] [-d directory] -f name\n\n";
     101"  %*s [-C] [-h] [-d directory] [-M maxnumeric] -f name\n\n";
    102102
    103103
     
    180180
    181181int main (int argc, char **argv) {
    182   int ch, in_fd;
     182  int ch, in_fd, maxnum;
    183183  char *filename = NULL;
    184184  bool compatMode = false;
     
    189189
    190190  opterr = 0;
    191   while ((ch=getopt(argc, argv, "J:K:L:f:d:m:I:T:SCh"))!=-1){
     191  while ((ch=getopt(argc, argv, "J:K:L:M:f:d:m:I:T:SCh"))!=-1){
    192192    switch (ch) {
    193193    case 'J':
     
    199199    case 'L':
    200200      tagInfo.SetIndexLevel (optarg);
     201      break;
     202    case 'M':
     203      maxnum = atoi(optarg);
     204      if (4 < maxnum < 512) {
     205    cout << "setting maxnum to "<<maxnum<<endl;
     206    MAXNUMERIC = maxnum;
     207      }
    201208      break;
    202209    case 'f':
  • trunk/mgpp/text/words.cpp

    r8692 r12321  
    2727#include "words.h"
    2828
     29int MAXNUMERIC = 4;
    2930
    3031/* Takes the place of the old INAWORD macro. It determines
  • trunk/mgpp/text/words.h

    r9613 r12321  
    1919 *
    2020 **************************************************************************/
     21#ifndef H_WORDS
     22#define H_WORDS
     23
    2124
    2225#include "sysfuncs.h"
     
    5053       of the program, , i.e., leave MAXSTEMLEN alone... */
    5154
    52 #define MAXNUMERIC  4
     55//#define MAXNUMERIC    4
    5356    /* Maximum number of numeric characters permitted in a word.
    5457       This avoids long sequences of numbers creating just one
    5558       word occurrence for each number. At most 10,000 all numeric
    5659       words will be permitted. */
     60
     61extern  int MAXNUMERIC;
    5762
    5863/* [RPAP - Jan 97: Stem Index Change] */
     
    163168    *(Word) = length;                                              \
    164169  }while(0)
     170
     171#endif
Note: See TracChangeset for help on using the changeset viewer.