Ignore:
Timestamp:
2006-12-11T11:22:20+13:00 (17 years ago)
Author:
shaoqun
Message:

added code for accentfolding

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/indexers/mgpp/text/mgpp_stem_idx.cpp

    r9613 r13477  
    8080
    8181    // stem the word
    82     stemmer (stemMethod, stemmerNum, mgWord);
     82    mgpp_stemmer (stemMethod, stemmerNum, mgWord);
    8383
    8484    // convert the result back to a UCArray
     
    101101            int stemmerNum,
    102102            unsigned long entriesPerBlock) {
     103 
     104  /* [JFG - Mar 06: Accent folding patch] */
    103105  // Create appropriate stem index file
    104106  FILE *stemDictFile = NULL;
    105   if (stemMethod == 1) {
    106     stemDictFile = create_file (filename, INVF_DICT_BLOCKED_1_SUFFIX,
    107                 "wb", MAGIC_STEM_1, MG_ABORT);
    108   } else if (stemMethod == 2) {
    109     stemDictFile = create_file (filename, INVF_DICT_BLOCKED_2_SUFFIX,
    110                 "wb", MAGIC_STEM_2, MG_ABORT);
    111   } else if (stemMethod == 3) {
    112     stemDictFile = create_file (filename, INVF_DICT_BLOCKED_3_SUFFIX,
    113                 "wb", MAGIC_STEM_3, MG_ABORT);
    114   } else {
     107  if (stemMethod >= STEM_MIN && stemMethod <= STEM_MAX) {
     108    char *suffix = make_suffix (INVF_DICT_BLOCKED_SUFFIX_PAT, stemMethod, NULL);
     109    stemDictFile = create_file (filename, suffix,
     110                "wb", MAGIC_STEM_GEN(stemMethod + '0'), MG_ABORT); 
     111  }
     112  else {
    115113    FatalError (1, "Unknown stem method %d", stemMethod);
    116114  }
     
    213211      break;
    214212    case 'a':
    215       stemmerNum = stemmernumber ((unsigned char *) optarg);
     213      stemmerNum = mgpp_stemmernumber ((unsigned char *) optarg);
    216214      break;
    217215    case 'h':
    218216    case '?':
    219217      fprintf (stderr, "usage: %s [-d directory] "
    220            "[-b entries-per-block] [-h] -s 1|2|3 "
    221            "[-a stemmer-method] -f name\n", argv[0]);
     218           "[-b entries-per-block] [-h] -s 1|2|3", argv[0]);
     219#ifdef ENABLE_ACCENTFOLD
     220      fprintf (stderr, "|4|5|6|7");
     221#endif
     222      fprintf (stderr, " [-a stemmer-method] -f name\n");
    222223      exit (1);
    223224    }
    224225  }
    225226 
    226   if (stemMethod < 1 || stemMethod > 3)
    227     FatalError (1, "Stem method must be 1, 2 or 3");
    228 
     227  /* [JFG - Mar 06: Accent folding patch] */
     228  if (stemMethod < STEM_MIN || stemMethod > STEM_MAX)
     229    FatalError (1, "Stem method must be between %d and %d", STEM_MIN, STEM_MAX);
     230#ifndef ENABLE_ACCENTFOLD
     231  if (stemMethod & STEM_AccentFolding) {
     232    // accent folding not enabled
     233    return -1;
     234  }
     235#endif
    229236  // read in the dictionary and create the in memory dictionary
    230237  StemMapDict stemDict;
Note: See TracChangeset for help on using the changeset viewer.