Ignore:
Timestamp:
2004-05-27T13:50:24+12:00 (20 years ago)
Author:
kjdon
Message:

adding more options to mgpasses, tidying up

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/packages/mg/src/text/mg_passes_4jni.c

    r7455 r7460  
    11/**************************************************************************
    22 *
    3  * mg_passes.c -- Driver for the various passes
     3 * mg_passes_4jni.c -- Driver for the various passes
    44 * Copyright (C) 1994  Neil Sharman
    55 *
     
    3535#include "stemmer.h"
    3636
    37 
    3837#include "mg_files.h"
    3938#include "mg.h"
     
    113112};
    114113
    115 static char *usage_str = "\nUSAGE:\n"
    116 "  %s [-h] [-G] [-D] [-1|-2|-3] [-T1] [-T2] [-I1] [-I2] [-N1]\n"
    117 "  %*s [-N2] [-W] [-S] [-b buffer-size] [-d dictionary-directory]\n"
    118 "  %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n"
    119 "  %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n"
    120 "  %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n";
    121 
    122 
    123 static void
    124 usage (char *err)
    125 {
    126   if (err)
    127     Message (err);
    128   fprintf (stderr, usage_str, msg_prefix, strlen (msg_prefix), "",
    129        strlen (msg_prefix), "",strlen (msg_prefix), "",
    130        strlen (msg_prefix),"");
    131   exit (1);
    132 }
    133 
    134 
    135 
    136 
    137 #if 0
    138 static char *
    139 str_comma (unsigned long u)
    140 {
    141   static char buf[20];
    142   unsigned long a, b, c, d;
    143   a = u / 1000000000;
    144   u -= a * 1000000000;
    145   b = u / 1000000;
    146   u -= b * 1000000;
    147   c = u / 1000;
    148   u -= c * 1000;
    149   d = u;
    150 
    151   if (a)
    152     sprintf (buf, "%u,%03u,%03u,%03u", a, b, c, d);
    153   else if (b)
    154     sprintf (buf, "%u,%03u,%03u", b, c, d);
    155   else if (c)
    156     sprintf (buf, "%u,%03u", c, d);
    157   else
    158     sprintf (buf, "%u", d);
    159   return (buf);
    160 }
    161 #endif
    162 
    163 
    164 
    165 /*
    166   int
    167   open_next_file (int in_fd)
    168   {
    169   if (in_fd > 0)
    170   close (in_fd);
    171   if (num_files == 0)
    172   return (-1);
    173   if ((in_fd = open (files[0], O_RDONLY)) == -1)
    174   FatalError (1, "Cannot open %s", files[0]);
    175   files++;
    176   num_files--;
    177   return (in_fd);
    178   }
    179 */
    180 
     114
     115/* clear all the settings from one mg_passes run to the next */
    181116void clear_variables() {
    182117
     
    207142 
    208143}
    209 void set_invf_level(char level) {
    210 
    211   switch (level) {
    212   case '1':
    213     InvfLevel = 1;
    214     break;
    215   case '2':
    216     InvfLevel = 2;
    217     break;
    218   case '3':
    219     InvfLevel = 3;
    220     break;
    221   }
    222 
    223 }
    224 void set_inversion_limit(int limit) {
    225   invf_buffer_size = limit * 1024 * 1024;
    226 }
    227 
    228 void ignore_sgml_tags(int ignore) {
    229   if (ignore) {
    230     SkipSGML = 1;
    231   } else {
    232     SkipSGML = 0;
    233   }
    234 }
    235 
    236 void set_buffer_size(long size) {
    237   buf_size = size * 1024;
    238   if (buf_size <  MIN_BUF) {
    239     buf_size = MIN_BUF;
    240   }
    241 }
    242 
    243 void set_stem_options(char * stemmer, int method) {
    244   stemmer_num = stemmernumber (stemmer);
    245   stem_method = method & STEMMER_MASK;
    246 
    247 }
    248 
    249 void set_filename(char * filen) {
    250   int len = strlen(filen);
    251   if (filename) {
    252     Xfree (filename);
    253     filename = NULL;
    254   }
    255   filename = Xstrdup (filen);
    256   // put this here for now
    257   Dump=1;
    258   trace = 512;
    259   if (!trace_name)
    260     trace_name = make_name (filename, TRACE_SUFFIX, NULL);
    261   if (!(Trace = fopen (trace_name, "a")))
    262     Message ("Unable to open \"%s\". No tracing will be done.", trace_name);
    263   else
    264     setbuf (Trace, NULL);
    265  
    266 }
    267 
    268 
     144
     145/* ################################################## */
     146/* the following are methods to set all the variables that used to be
     147   set by command line args */
     148
     149/* -S, -T1, -T2, -I1, -I2, args to mg_passes */
    269150void add_pass (char pass_type, char pass_num) {
    270151
     
    291172    break;
    292173  }
    293 
    294 }
     174}
     175
     176/* -D arg to mg_passes */
     177void dump_failed_document(int dump) {
     178  Dump = dump;
     179}
     180
     181/* -G arg to mg_passes */
     182void ignore_sgml_tags(int ignore) {
     183  if (ignore) {
     184    SkipSGML = 1;
     185  } else {
     186    SkipSGML = 0;
     187  }
     188}
     189
     190/* -b arg to mg_passes */
     191void set_buffer_size(long size) {
     192  buf_size = size * 1024;
     193  if (buf_size <  MIN_BUF) {
     194    buf_size = MIN_BUF;
     195  }
     196}
     197
     198/* -c arg to mg_passes */
     199void set_chunk_limit(long chunk_limit) {
     200  ChunkLimit = chunk_limit;
     201}
     202
     203/* -C arg to mg_passes */
     204void set_comp_stat_point(int stat_point) {
     205  comp_stat_point = stat_point * 1024;
     206}
     207
     208/* -f arg to mg_passes */
     209void set_filename(char * filen) {
     210  int len = strlen(filen);
     211  if (filename) {
     212    Xfree (filename);
     213    filename = NULL;
     214  }
     215  filename = Xstrdup (filen); 
     216}
     217
     218/* -m arg to mg_passes */
     219void set_inversion_limit(int limit) {
     220  invf_buffer_size = limit * 1024 * 1024;
     221}
     222
     223/* -1, -2, -3 args to mg_passes */
     224void set_invf_level(char level) {
     225  switch (level) {
     226  case '1':
     227    InvfLevel = 1;
     228    break;
     229  case '2':
     230    InvfLevel = 2;
     231    break;
     232  case '3':
     233    InvfLevel = 3;
     234    break;
     235  }
     236}
     237
     238/* -W arg to mg_passes */
     239void set_make_weights(int make_w) {
     240  MakeWeights = make_w;
     241}
     242
     243/* -M arg to mg_passes */
     244void set_max_numeric(int max_numeric) {
     245  SetEnv ("maxnumeric", max_numeric, NULL);
     246}
     247
     248/* -a, -s args to mg_passes */
     249void set_stem_options(char * stemmer, int method) {
     250  stemmer_num = stemmernumber (stemmer);
     251  stem_method = method & STEMMER_MASK;
     252}
     253
     254/* -t arg to mg_passes */
     255void set_trace_point(int tracepos) {
     256  trace = (unsigned long) (tracepos * 1024 * 1024);
     257}
     258
     259/* -n arg to mg_passes */
     260void set_trace_file(char * filen) {
     261  int len = strlen(filen);
     262  if (trace_name) {
     263    Xfree (trace_name);
     264    trace_name = NULL;
     265  }
     266  trace_name = Xstrdup (filen);
     267}
     268
     269/* ############################################### */
     270/* The old driver method has been split into 3:
     271init_driver, process_document (called numdocs times),
     272finalise_driver.
     273The above set vars methods should all be called before init_driver.
     274*/
     275
     276
    295277ProgTime StartTime, InitTime, ProcTime, DoneTime;
    296278
     
    298280init_driver ()
    299281{
     282  if (!filename || *filename == '\0')
     283    FatalError (1, "A document collection name must be specified.");
     284
     285  if ((Passes & (IVF_PASS_1 | IVF_PASS_2)) == (IVF_PASS_1 | IVF_PASS_2))
     286    FatalError (1, "I1 and I2 cannot be done simultaneously.");
     287
     288  if ((Passes & (TEXT_PASS_1 | TEXT_PASS_2)) == (TEXT_PASS_1 | TEXT_PASS_2))
     289    FatalError (1, "T1 and T2 cannot be done simultaneously.");
     290
     291  if (!Passes)
     292    FatalError (1, "S, T1, T2, I1 or I2 must be specified.");
     293
     294  if (trace)
     295    {
     296      if (!trace_name)
     297    trace_name = make_name (filename, TRACE_SUFFIX, NULL);
     298      if (!(Trace = fopen (trace_name, "a")))
     299    Message ("Unable to open \"%s\". No tracing will be done.", trace_name);
     300      else
     301    setbuf (Trace, NULL);
     302    }
     303  else
     304    Trace = NULL;
     305
     306  if (comp_stat_point)
     307    {
     308      char *name = make_name (filename, COMPRESSION_STATS_SUFFIX, NULL);
     309      if (!(Comp_Stats = fopen (name, "wb")))  /* [RPAP - Feb 97: WIN32 Port] */
     310    Message ("Unable to open \"%s\". No comp. stats. will be generated.",
     311         name);
     312    }
     313
     314  if (Trace)
     315    {
     316      int i;
     317      fprintf (Trace, "\n\n\t\t-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n\n");
     318      /* print out the args to mg_passes */
     319      fprintf (Trace, "\n\n");
     320    }
     321
    300322  int pass;
    301323
     
    338360  bytes_processed += len;
    339361
    340   printf("process doc, len=%d\n",len);
    341362#ifndef QUIET
    342363  if (!len)
     
    501522   (double) bytes_processed / (ProcTime.CPUTime - InitTime.CPUTime) / 1024);
    502523  //free (buffer);
    503 }
    504 
    505 
    506 
    507 int main (int argc, char **argv)
    508 {
    509   int ch, in_fd;
    510  
    511   msg_prefix = argv[0];
    512 
    513   opterr = 0;
    514   while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1)
    515     {
    516       switch (ch)
    517     {
    518     case 'G':
    519       SkipSGML = 1;
    520       break;
    521     case 'S':
    522       Passes |= SPECIAL;
    523       break;
    524     case '1':
    525       InvfLevel = 1;
    526       break;
    527     case '2':
    528       InvfLevel = 2;
    529       break;
    530     case '3':
    531       InvfLevel = 3;
    532       break;
    533     case 'f':
    534       filename = optarg;
    535       break;
    536     case 'n':
    537       trace_name = optarg;
    538       break;
    539     case 'D':
    540       Dump = 1;
    541       break;
    542     case 'W':
    543       MakeWeights = 1;
    544       break;
    545     case 'd':
    546       set_basepath (optarg);
    547       break;
    548     case 'a':
    549       stemmer_num = stemmernumber (optarg);
    550       break;
    551     case 's':
    552       stem_method = atoi (optarg) & STEMMER_MASK;
    553       break;
    554     case 'b':
    555       buf_size = atoi (optarg) * 1024;
    556       break;
    557     case 'C':
    558       comp_stat_point = atoi (optarg) * 1024;
    559       break;
    560     case 'c':
    561       ChunkLimit = atoi (optarg);
    562       break;
    563     case 'm':
    564       invf_buffer_size = (int) (atof (optarg) * 1024 * 1024);
    565       break;
    566     case 'I':
    567     case 'N': /* N kept for compatability */
    568       if (*optarg == '1')
    569         Passes |= IVF_PASS_1;
    570       else if (*optarg == '2')
    571         Passes |= IVF_PASS_2;
    572       else
    573         usage ("Invalid pass number");
    574       break;
    575     case 'T':
    576       if (*optarg == '1')
    577         Passes |= TEXT_PASS_1;
    578       else if (*optarg == '2')
    579         Passes |= TEXT_PASS_2;
    580       else
    581         usage ("Invalid pass number");
    582       break;
    583     case 't':
    584       trace = (unsigned long) (atof (optarg) * 1024 * 1024);
    585       break;
    586     case 'M':
    587       SetEnv ("maxnumeric", optarg, NULL);
    588       break;
    589     case 'h':
    590     case '?':
    591       usage (NULL);
    592     }
    593     }
    594 
    595   if (!filename || *filename == '\0')
    596     FatalError (1, "A document collection name must be specified.");
    597 
    598   if (buf_size < MIN_BUF)
    599     FatalError (1, "The buffer size must exceed 1024 bytes.");
    600 
    601   if ((Passes & (IVF_PASS_1 | IVF_PASS_2)) == (IVF_PASS_1 | IVF_PASS_2))
    602     FatalError (1, "I1 and I2 cannot be done simultaneously.");
    603 
    604   if ((Passes & (TEXT_PASS_1 | TEXT_PASS_2)) == (TEXT_PASS_1 | TEXT_PASS_2))
    605     FatalError (1, "T1 and T2 cannot be done simultaneously.");
    606 
    607   if (!Passes)
    608     FatalError (1, "S, T1, T2, I1 or I2 must be specified.");
    609 
    610   if (optind < argc)
    611     {
    612       if ((in_fd = open (argv[optind], O_RDONLY)) == -1)
    613     FatalError (1, "Cannot open %s", argv[optind]);
    614       files = &argv[optind + 1];
    615       num_files = argc - (optind + 1);
    616     }
    617   else
    618     in_fd = 0;          /* stdin */
    619 
    620 
    621   if (trace)
    622     {
    623       if (!trace_name)
    624     trace_name = make_name (filename, TRACE_SUFFIX, NULL);
    625       if (!(Trace = fopen (trace_name, "a")))
    626     Message ("Unable to open \"%s\". No tracing will be done.", trace_name);
    627       else
    628     setbuf (Trace, NULL);
    629     }
    630   else
    631     Trace = NULL;
    632 
    633   if (comp_stat_point)
    634     {
    635       char *name = make_name (filename, COMPRESSION_STATS_SUFFIX, NULL);
    636       if (!(Comp_Stats = fopen (name, "wb")))  /* [RPAP - Feb 97: WIN32 Port] */
    637     Message ("Unable to open \"%s\". No comp. stats. will be generated.",
    638          name);
    639     }
    640 
    641 
    642   if (Trace)
    643     {
    644       int i;
    645       fprintf (Trace, "\n\n\t\t-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n\n");
    646       for (i = 0; i < argc; i++)
    647     fprintf (Trace, "%s ", argv[i]);
    648       fprintf (Trace, "\n\n");
    649     }
    650 
    651   init_driver ();
    652   /* here we have to do something to process docs from stdin */
    653   finalise_driver();
     524
    654525  if (Trace)
    655526    fclose (Trace);
     
    658529    fclose (Comp_Stats);
    659530
    660   return 0;
    661 }
     531}
     532
Note: See TracChangeset for help on using the changeset viewer.