[439] | 1 | /**************************************************************************
|
---|
| 2 | *
|
---|
| 3 | * mgquery.c -- The M G Q U E R Y program
|
---|
| 4 | * Copyright (C) 1994 Neil Sharman
|
---|
| 5 | *
|
---|
| 6 | * This program is free software; you can redistribute it and/or modify
|
---|
| 7 | * it under the terms of the GNU General Public License as published by
|
---|
| 8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 9 | * (at your option) any later version.
|
---|
| 10 | *
|
---|
| 11 | * This program is distributed in the hope that it will be useful,
|
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | * GNU General Public License for more details.
|
---|
| 15 | *
|
---|
| 16 | * You should have received a copy of the GNU General Public License
|
---|
| 17 | * along with this program; if not, write to the Free Software
|
---|
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 19 | *
|
---|
| 20 | * $Id: mgquery.c 439 1999-08-10 21:23:37Z sjboddie $
|
---|
| 21 | *
|
---|
| 22 | **************************************************************************/
|
---|
| 23 |
|
---|
| 24 | /*
|
---|
| 25 | $Log$
|
---|
| 26 | Revision 1.1 1999/08/10 21:18:18 sjboddie
|
---|
| 27 | renamed mg-1.3d directory mg
|
---|
| 28 |
|
---|
| 29 | Revision 1.3 1999/01/08 00:33:46 rjmcnab
|
---|
| 30 |
|
---|
| 31 | Enabled mg and the library software to read in more than one index
|
---|
| 32 | at a time.
|
---|
| 33 |
|
---|
| 34 | Revision 1.2 1998/11/25 07:55:49 rjmcnab
|
---|
| 35 |
|
---|
| 36 | Modified mg to that you can specify the stemmer you want
|
---|
| 37 | to use via a command line option. You specify it to
|
---|
| 38 | mg_passes during the build process. The number of the
|
---|
| 39 | stemmer that you used is stored within the inverted
|
---|
| 40 | dictionary header and the stemmed dictionary header so
|
---|
| 41 | the correct stemmer is used in later stages of building
|
---|
| 42 | and querying.
|
---|
| 43 |
|
---|
| 44 | Revision 1.1 1998/11/17 09:35:29 rjmcnab
|
---|
| 45 | *** empty log message ***
|
---|
| 46 |
|
---|
| 47 | * Revision 1.3 1994/10/20 03:57:02 tes
|
---|
| 48 | * I have rewritten the boolean query optimiser and abstracted out the
|
---|
| 49 | * components of the boolean query.
|
---|
| 50 | *
|
---|
| 51 | * Revision 1.2 1994/09/20 04:41:58 tes
|
---|
| 52 | * For version 1.1
|
---|
| 53 | *
|
---|
| 54 | */
|
---|
| 55 |
|
---|
| 56 | static char *RCSID = "$Id: mgquery.c 439 1999-08-10 21:23:37Z sjboddie $";
|
---|
| 57 |
|
---|
| 58 | #include "sysfuncs.h"
|
---|
| 59 |
|
---|
| 60 | #if defined(HAVE_SYS_PROCFS_H) && defined(HAVE_PR_BRKSIZE) && \
|
---|
| 61 | (__STDC__ == 0)
|
---|
| 62 | /* STDC test was included to allow cc -Xc on SunOS 5 to work */
|
---|
| 63 | #define USE_PROCESS_MEM
|
---|
| 64 | #endif
|
---|
| 65 |
|
---|
| 66 | #ifdef USE_PROCESS_MEM
|
---|
| 67 | # include <sys/procfs.h>
|
---|
| 68 | #endif
|
---|
| 69 |
|
---|
| 70 | #ifdef HAVE_GETRUSAGE
|
---|
| 71 | # ifdef HAVE_SYS_TIME_H
|
---|
| 72 | # include <sys/time.h>
|
---|
| 73 | # endif
|
---|
| 74 | # include <sys/resource.h>
|
---|
| 75 | #endif
|
---|
| 76 |
|
---|
| 77 | #ifndef HAVE_GETPAGESIZE
|
---|
| 78 | # include "getpagesize.h"
|
---|
| 79 | #endif
|
---|
| 80 |
|
---|
| 81 | #if WITH_REGEX
|
---|
| 82 | # include <regex.h>
|
---|
| 83 | #else
|
---|
| 84 | # include <rx.h>
|
---|
| 85 | #endif
|
---|
| 86 |
|
---|
| 87 |
|
---|
| 88 | #include <stdarg.h>
|
---|
| 89 | #include <signal.h>
|
---|
| 90 |
|
---|
| 91 | #include "messages.h"
|
---|
| 92 | #include "timing.h"
|
---|
| 93 | #include "memlib.h"
|
---|
| 94 | #include "local_strings.h" /* [RPAP - Feb 97: Term Frequency] */
|
---|
| 95 |
|
---|
| 96 | #include "filestats.h"
|
---|
| 97 | #include "invf.h"
|
---|
| 98 | #include "text.h"
|
---|
| 99 | #include "mg.h"
|
---|
| 100 | #include "lists.h"
|
---|
| 101 | #include "backend.h"
|
---|
| 102 | #include "environment.h"
|
---|
| 103 | #include "globals.h"
|
---|
| 104 | #include "read_line.h"
|
---|
| 105 | #include "mg_errors.h"
|
---|
| 106 | #include "commands.h"
|
---|
| 107 | #include "text_get.h"
|
---|
| 108 | #include "term_lists.h"
|
---|
| 109 | #include "query_term_list.h"
|
---|
| 110 |
|
---|
| 111 |
|
---|
| 112 | FILE *OutFile = NULL, *InFile = NULL;
|
---|
| 113 | int OutPipe = 0, InPipe = 0;
|
---|
| 114 | int Quitting = 0;
|
---|
| 115 |
|
---|
| 116 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 117 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 118 | int GetDocNumFromParaNum(query_data *qd, int paranum) {
|
---|
| 119 | int Documents = qd->td->cth.num_of_docs;
|
---|
| 120 | int *Paragraph = qd->paragraph;
|
---|
| 121 | int low = 1, high = Documents;
|
---|
| 122 | int mid = (low+high)/2;
|
---|
| 123 |
|
---|
| 124 | while ((mid = (low+high)/2) >=1 && mid <= Documents)
|
---|
| 125 | {
|
---|
| 126 | if (paranum > Paragraph[mid])
|
---|
| 127 | low = mid+1;
|
---|
| 128 | else if (paranum <= Paragraph[mid-1])
|
---|
| 129 | high = mid-1;
|
---|
| 130 | else
|
---|
| 131 | return mid;
|
---|
| 132 | }
|
---|
| 133 | FatalError(1, "Bad paragraph number.\n");
|
---|
| 134 | }
|
---|
| 135 | #endif
|
---|
| 136 |
|
---|
| 137 | #ifdef TREC_MODE
|
---|
| 138 | char *trec_ids = NULL;
|
---|
| 139 | long *trec_paras = NULL;
|
---|
| 140 | #endif
|
---|
| 141 |
|
---|
| 142 | static volatile int PagerRunning = 0;
|
---|
| 143 | static volatile int Ctrl_C = 0;
|
---|
| 144 |
|
---|
| 145 |
|
---|
| 146 | /*****************************************************************************/
|
---|
| 147 |
|
---|
| 148 | typedef enum
|
---|
| 149 | {
|
---|
| 150 | S_Time, S_Mem, S_Size, S_File
|
---|
| 151 | }
|
---|
| 152 | S_Type;
|
---|
| 153 |
|
---|
| 154 | static struct Stat
|
---|
| 155 | {
|
---|
| 156 | S_Type typ;
|
---|
| 157 | char *name;
|
---|
| 158 | char *text;
|
---|
| 159 | }
|
---|
| 160 | *Stats = NULL;
|
---|
| 161 | static int NumStats = 0;
|
---|
| 162 |
|
---|
| 163 | static void
|
---|
| 164 | Clear_Stats (void)
|
---|
| 165 | {
|
---|
| 166 | if (Stats)
|
---|
| 167 | {
|
---|
| 168 | int i;
|
---|
| 169 | for (i = 0; i < NumStats; i++)
|
---|
| 170 | {
|
---|
| 171 | if (Stats[i].name)
|
---|
| 172 | Xfree (Stats[i].name);
|
---|
| 173 | if (Stats[i].text)
|
---|
| 174 | Xfree (Stats[i].text);
|
---|
| 175 | }
|
---|
| 176 | Xfree (Stats);
|
---|
| 177 | Stats = NULL;
|
---|
| 178 | NumStats = 0;
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | static void
|
---|
| 183 | Add_Stats (S_Type typ, char *name, char *fmt,...)
|
---|
| 184 | {
|
---|
| 185 | char buf[1024];
|
---|
| 186 | va_list args;
|
---|
| 187 | va_start (args, fmt);
|
---|
| 188 | vsprintf (buf, fmt, args);
|
---|
| 189 | if (Stats)
|
---|
| 190 | Stats = Xrealloc (Stats, (++NumStats) * sizeof (*Stats));
|
---|
| 191 | else
|
---|
| 192 | Stats = Xmalloc ((++NumStats) * sizeof (*Stats));
|
---|
| 193 | Stats[NumStats - 1].typ = typ;
|
---|
| 194 | Stats[NumStats - 1].name = Xstrdup (name);
|
---|
| 195 | Stats[NumStats - 1].text = Xstrdup (buf);
|
---|
| 196 | }
|
---|
| 197 |
|
---|
| 198 | static void
|
---|
| 199 | Display_Stats (FILE * f)
|
---|
| 200 | {
|
---|
| 201 | static char *sep = "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
|
---|
| 202 | "-=-=-=-=-=-=-=-=-=-=-";
|
---|
| 203 | char *names[] =
|
---|
| 204 | {"Time: ", "Memory:", "Sizes: ", "Disk: ", " "};
|
---|
| 205 | int i, last_typ = -1;
|
---|
| 206 | size_t len = 0;
|
---|
| 207 | if (NumStats == 0)
|
---|
| 208 | return;
|
---|
| 209 | fprintf (f, "%s\n", sep);
|
---|
| 210 | for (i = 0; i < NumStats; i++)
|
---|
| 211 | if (strlen (Stats[i].name) > len)
|
---|
| 212 | len = strlen (Stats[i].name);
|
---|
| 213 | for (i = 0; i < NumStats; i++)
|
---|
| 214 | {
|
---|
| 215 | int typ = 4;
|
---|
| 216 | if (Stats[i].typ != last_typ)
|
---|
| 217 | typ = last_typ = Stats[i].typ;
|
---|
| 218 | fprintf (f, "%s %-*s %s\n", names[typ], (int) len, Stats[i].name, Stats[i].text);
|
---|
| 219 | }
|
---|
| 220 | fprintf (f, "%s\n", sep);
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 | /*****************************************************************************/
|
---|
| 224 |
|
---|
| 225 |
|
---|
| 226 | static void
|
---|
| 227 | QueryTimeStats (ProgTime * Start, ProgTime * invf, ProgTime * text)
|
---|
| 228 | {
|
---|
| 229 | if (!BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 230 | {
|
---|
| 231 | Add_Stats (S_Time, "invf", ElapsedTime (Start, invf));
|
---|
| 232 | Add_Stats (S_Time, "text", ElapsedTime (invf, text));
|
---|
| 233 | }
|
---|
| 234 | Add_Stats (S_Time, "total", ElapsedTime (Start, text));
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | static void
|
---|
| 238 | StartUpTimeStats (InitQueryTimes * iqt)
|
---|
| 239 | {
|
---|
| 240 | if (!BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 241 | {
|
---|
| 242 | Add_Stats (S_Time, "dict [stem]", ElapsedTime (&iqt->Start,
|
---|
| 243 | &iqt->StemDict));
|
---|
| 244 | Add_Stats (S_Time, "weights", ElapsedTime (&iqt->StemDict,
|
---|
| 245 | &iqt->ApproxWeights));
|
---|
| 246 | Add_Stats (S_Time, "dict [text]", ElapsedTime (&iqt->ApproxWeights,
|
---|
| 247 | &iqt->CompDict));
|
---|
| 248 | Add_Stats (S_Time, "Inverted", ElapsedTime (&iqt->CompDict,
|
---|
| 249 | &iqt->Invf));
|
---|
| 250 | Add_Stats (S_Time, "Compressed", ElapsedTime (&iqt->Invf,
|
---|
| 251 | &iqt->Text));
|
---|
| 252 | }
|
---|
| 253 | Add_Stats (S_Time, "total", ElapsedTime (&iqt->Start, &iqt->Text));
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 |
|
---|
| 257 |
|
---|
| 258 |
|
---|
| 259 | #ifdef USE_PROCESS_MEM
|
---|
| 260 | static u_long
|
---|
| 261 | process_mem (void)
|
---|
| 262 | {
|
---|
| 263 | prstatus_t pr;
|
---|
| 264 | static int fd = -1;
|
---|
| 265 | if (fd == -1)
|
---|
| 266 | {
|
---|
| 267 | char buf[128];
|
---|
| 268 | sprintf (buf, "/proc/%ld", (long) getpid ());
|
---|
| 269 | fd = open (buf, O_RDONLY);
|
---|
| 270 | }
|
---|
| 271 | if (fd == -1 || ioctl (fd, PIOCSTATUS, &pr) == -1)
|
---|
| 272 | return 0;
|
---|
| 273 | return pr.pr_brksize;
|
---|
| 274 | }
|
---|
| 275 | #endif
|
---|
| 276 |
|
---|
| 277 |
|
---|
| 278 |
|
---|
| 279 |
|
---|
| 280 | static void
|
---|
| 281 | MemStats (query_data * qd)
|
---|
| 282 | {
|
---|
| 283 | if (!BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 284 | {
|
---|
| 285 | #ifdef HAVE_GETRUSAGE
|
---|
| 286 | struct rusage rusage;
|
---|
| 287 | getrusage (RUSAGE_SELF, &rusage);
|
---|
| 288 |
|
---|
| 289 | Add_Stats (S_Mem, "process mem", "%7.3f Mb",
|
---|
| 290 | (double) (rusage.ru_maxrss * getpagesize () / 1024.0 / 1024.0));
|
---|
| 291 | #endif
|
---|
| 292 | #ifdef USE_PROCESS_MEM
|
---|
| 293 | Add_Stats (S_Mem, "process mem", "%7.3f Mb",
|
---|
| 294 | (double) (process_mem () / 1024.0 / 1024.0));
|
---|
| 295 | #endif
|
---|
| 296 | Add_Stats (S_Mem, "dict [stem]", "%7.1f kB",
|
---|
| 297 | (double) qd->sd->MemForStemDict / 1024);
|
---|
| 298 | Add_Stats (S_Mem, "dict [text]", "%7.1f kB",
|
---|
| 299 | (double) qd->cd->MemForCompDict / 1024);
|
---|
| 300 | if (qd->awd)
|
---|
| 301 | Add_Stats (S_Mem, "weights", "%7.1f kB",
|
---|
| 302 | (double) qd->awd->MemForWeights / 1024);
|
---|
| 303 | }
|
---|
| 304 | if (qd->awd)
|
---|
| 305 | Add_Stats (S_Mem, "total [peak]", "%7.1f kB",
|
---|
| 306 | (double) (qd->max_mem_in_use + qd->sd->MemForStemDict +
|
---|
| 307 | qd->cd->MemForCompDict + qd->awd->MemForWeights) / 1024);
|
---|
| 308 | else
|
---|
| 309 | Add_Stats (S_Mem, "total [peak]", "%7.1f kB",
|
---|
| 310 | (double) (qd->max_mem_in_use + qd->sd->MemForStemDict +
|
---|
| 311 | qd->cd->MemForCompDict) / 1024);
|
---|
| 312 |
|
---|
| 313 | }
|
---|
| 314 |
|
---|
| 315 |
|
---|
| 316 |
|
---|
| 317 | static void
|
---|
| 318 | SizeStats (query_data * qd)
|
---|
| 319 | {
|
---|
| 320 | Add_Stats (S_Size, "skips", "%7d", qd->hops_taken);
|
---|
| 321 | Add_Stats (S_Size, "pointers", "%7d", qd->num_of_ptrs);
|
---|
| 322 | Add_Stats (S_Size, "accumulators", "%7d", qd->num_of_accum);
|
---|
| 323 | Add_Stats (S_Size, "terms", "%7d", qd->num_of_terms);
|
---|
| 324 | Add_Stats (S_Size, "answers", "%7d", qd->num_of_ans);
|
---|
| 325 | Add_Stats (S_Size, "index lookups", "%7d", qd->text_idx_lookups);
|
---|
| 326 | }
|
---|
| 327 |
|
---|
| 328 | static void
|
---|
| 329 | TotalSizeStats (query_data * qd)
|
---|
| 330 | {
|
---|
| 331 | Add_Stats (S_Size, "skips", "%7d", qd->tot_hops_taken);
|
---|
| 332 | Add_Stats (S_Size, "pointers", "%7d", qd->tot_num_of_ptrs);
|
---|
| 333 | Add_Stats (S_Size, "accumulators", "%7d", qd->tot_num_of_accum);
|
---|
| 334 | Add_Stats (S_Size, "terms", "%7d", qd->tot_num_of_terms);
|
---|
| 335 | Add_Stats (S_Size, "answers", "%7d", qd->tot_num_of_ans);
|
---|
| 336 | Add_Stats (S_Size, "index lookups", "%7d", qd->tot_text_idx_lookups);
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 |
|
---|
| 340 | static void
|
---|
| 341 | StatFile (File * F)
|
---|
| 342 | {
|
---|
| 343 | static unsigned long NumBytes = 0, NumSeeks = 0, NumReads = 0;
|
---|
| 344 | if (F)
|
---|
| 345 | if ((int) F != -1)
|
---|
| 346 | {
|
---|
| 347 | if (!BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 348 | Add_Stats (S_File, F->name, "%7.1f kB (%3d seeks, %7d reads)",
|
---|
| 349 | (double) F->Current.NumBytes / 1024, F->Current.NumSeeks,
|
---|
| 350 | F->Current.NumReads);
|
---|
| 351 | NumBytes += F->Current.NumBytes;
|
---|
| 352 | NumSeeks += F->Current.NumSeeks;
|
---|
| 353 | NumReads += F->Current.NumReads;
|
---|
| 354 | }
|
---|
| 355 | else
|
---|
| 356 | {
|
---|
| 357 | Add_Stats (S_File, "total", "%7.1f kB (%3d seeks, %7d reads)",
|
---|
| 358 | (double) NumBytes / 1024, NumSeeks, NumReads);
|
---|
| 359 | NumSeeks = NumReads = NumBytes = 0;
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | }
|
---|
| 363 |
|
---|
| 364 |
|
---|
| 365 | static void
|
---|
| 366 | File_Stats (query_data * qd)
|
---|
| 367 | {
|
---|
| 368 | StatFile (qd->File_comp_dict);
|
---|
| 369 | StatFile (qd->File_fast_comp_dict);
|
---|
| 370 | StatFile (qd->File_text_idx_wgt);
|
---|
| 371 | StatFile (qd->File_text);
|
---|
| 372 | StatFile (qd->File_stem);
|
---|
| 373 |
|
---|
| 374 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 375 | if (qd->sd->sdh.indexed)
|
---|
| 376 | {
|
---|
| 377 | StatFile (qd->File_stem1);
|
---|
| 378 | StatFile (qd->File_stem2);
|
---|
| 379 | StatFile (qd->File_stem3);
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | StatFile (qd->File_invf);
|
---|
| 383 | StatFile (qd->File_weight_approx);
|
---|
| 384 | StatFile (qd->File_text_idx);
|
---|
| 385 | StatFile ((File *) (-1));
|
---|
| 386 | }
|
---|
| 387 |
|
---|
| 388 |
|
---|
| 389 | char *
|
---|
| 390 | get_query (query_data * qd)
|
---|
| 391 | {
|
---|
| 392 | char *line, *LinePtr;
|
---|
| 393 | WritePrompt ();
|
---|
| 394 | do
|
---|
| 395 | {
|
---|
| 396 | do
|
---|
| 397 | {
|
---|
| 398 | line = GetMultiLine ();
|
---|
| 399 | if (line == NULL)
|
---|
| 400 | {
|
---|
| 401 | if (stdin == InFile)
|
---|
| 402 | return (NULL); /* EOF */
|
---|
| 403 | if (InPipe)
|
---|
| 404 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 405 | #ifdef __WIN32__
|
---|
| 406 | _pclose (InFile);
|
---|
| 407 | #else
|
---|
| 408 | pclose (InFile);
|
---|
| 409 | #endif
|
---|
| 410 | else
|
---|
| 411 | fclose (InFile);
|
---|
| 412 | InPipe = 0;
|
---|
| 413 | InFile = stdin;
|
---|
| 414 | }
|
---|
| 415 | }
|
---|
| 416 | while (line == NULL);
|
---|
| 417 | LinePtr = ProcessCommands (line, qd);
|
---|
| 418 | if (CommandsErrorStr)
|
---|
| 419 | fprintf (stderr, "%s\n", CommandsErrorStr);
|
---|
| 420 | }
|
---|
| 421 | while (*LinePtr == '\0' && !Quitting);
|
---|
| 422 | return (LinePtr);
|
---|
| 423 | }
|
---|
| 424 |
|
---|
| 425 |
|
---|
| 426 | /* This is executed when a SIGPIPE is detected
|
---|
| 427 | i.e. If some one quits out of the PAGER, this is executed */
|
---|
| 428 | #ifdef HAVE_SIGCONTEXT
|
---|
| 429 | static RETSIGTYPE
|
---|
| 430 | SIGPIPE_handler (int sig, int code,
|
---|
| 431 | struct sigcontext *scp, char *addr)
|
---|
| 432 | #else
|
---|
| 433 | static RETSIGTYPE
|
---|
| 434 | SIGPIPE_handler (int sig)
|
---|
| 435 | #endif
|
---|
| 436 | {
|
---|
| 437 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 438 | #ifdef __WIN32__
|
---|
| 439 | signal (sig, SIG_IGN);
|
---|
| 440 | #else
|
---|
| 441 | signal (SIGPIPE, SIG_IGN);
|
---|
| 442 | #endif
|
---|
| 443 | PagerRunning = 0;
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 | /* This is executed when a SIGINT (i.e. CTRL-C) is detected */
|
---|
| 447 | #ifdef HAVE_SIGCONTEXT
|
---|
| 448 | static RETSIGTYPE
|
---|
| 449 | SIGINT_handler (int sig, int code,
|
---|
| 450 | struct sigcontext *scp, char *addr)
|
---|
| 451 | #else
|
---|
| 452 | static RETSIGTYPE
|
---|
| 453 | SIGINT_handler (int sig)
|
---|
| 454 | #endif
|
---|
| 455 | {
|
---|
| 456 | Ctrl_C = 1;
|
---|
| 457 | }
|
---|
| 458 |
|
---|
| 459 |
|
---|
| 460 |
|
---|
| 461 | static char *post_proc = NULL;
|
---|
| 462 |
|
---|
| 463 |
|
---|
| 464 |
|
---|
| 465 | void
|
---|
| 466 | GetPostProc (char *line)
|
---|
| 467 | {
|
---|
| 468 | char *start, *finish;
|
---|
| 469 | if (post_proc)
|
---|
| 470 | {
|
---|
| 471 | Xfree (post_proc);
|
---|
| 472 | post_proc = NULL;
|
---|
| 473 | }
|
---|
| 474 | start = strchr (line, '\"');
|
---|
| 475 | finish = strrchr (line, '\"');
|
---|
| 476 | if (start != finish)
|
---|
| 477 | {
|
---|
| 478 | /* found a pattern */
|
---|
| 479 | *finish = '\0';
|
---|
| 480 | post_proc = Xstrdup (start + 1);
|
---|
| 481 | strcpy (start, finish + 1);
|
---|
| 482 | if (BooleanEnv (GetEnv ("verbatim"), 1) == 0)
|
---|
| 483 | {
|
---|
| 484 | char *s;
|
---|
| 485 | s = re_comp (post_proc);
|
---|
| 486 | if (!s)
|
---|
| 487 | {
|
---|
| 488 | Xfree (post_proc);
|
---|
| 489 | post_proc = NULL;
|
---|
| 490 | }
|
---|
| 491 | }
|
---|
| 492 | }
|
---|
| 493 | else if (start != NULL)
|
---|
| 494 | {
|
---|
| 495 | /* found a single speech mark. Delete It. */
|
---|
| 496 | strcpy (start, start + 1);
|
---|
| 497 | }
|
---|
| 498 | }
|
---|
| 499 |
|
---|
| 500 | int
|
---|
| 501 | PostProc (char *UDoc, int verbatim)
|
---|
| 502 | {
|
---|
| 503 | if (!post_proc)
|
---|
| 504 | return 1;
|
---|
| 505 |
|
---|
| 506 | if (verbatim) {
|
---|
| 507 | return (strstr (UDoc, post_proc) != NULL);
|
---|
| 508 | }
|
---|
| 509 | return re_exec ((char *) UDoc);
|
---|
| 510 | }
|
---|
| 511 |
|
---|
| 512 |
|
---|
| 513 |
|
---|
| 514 | static DocEntry *
|
---|
| 515 | in_chain (int para, int ip, DocEntry * dc)
|
---|
| 516 | {
|
---|
| 517 | while (dc)
|
---|
| 518 | {
|
---|
| 519 | if (dc->DocNum - ip == para)
|
---|
| 520 | return dc;
|
---|
| 521 | dc = dc->Next;
|
---|
| 522 | }
|
---|
| 523 | return NULL;
|
---|
| 524 | }
|
---|
| 525 |
|
---|
| 526 | /* num should be greater than or equal to 1 */
|
---|
| 527 | int
|
---|
| 528 | RawDocOutput (query_data * qd, u_long num, FILE * Output)
|
---|
| 529 | {
|
---|
| 530 | static last_pos = 0;
|
---|
| 531 | static u_char *c_buffer = 0;
|
---|
| 532 | static int buf_len = -1;
|
---|
| 533 | static u_char *uc_buffer = 0;
|
---|
| 534 | u_long pos, len;
|
---|
| 535 | int ULen;
|
---|
| 536 |
|
---|
| 537 | FetchDocStart (qd, num, &pos, &len);
|
---|
| 538 |
|
---|
| 539 | if ((int) len > buf_len)
|
---|
| 540 | {
|
---|
| 541 | if (c_buffer)
|
---|
| 542 | {
|
---|
| 543 | Xfree (c_buffer);
|
---|
| 544 | Xfree (uc_buffer);
|
---|
| 545 | }
|
---|
| 546 | if (!(c_buffer = Xmalloc (len)))
|
---|
| 547 | return -1;
|
---|
| 548 | if (!(uc_buffer = Xmalloc ((int) (qd->td->cth.ratio * 1.01 *
|
---|
| 549 | len) + 100)))
|
---|
| 550 | return -1;
|
---|
| 551 | buf_len = len;
|
---|
| 552 | }
|
---|
| 553 | if (last_pos != pos)
|
---|
| 554 | Fseek (qd->td->TextFile, pos, 0);
|
---|
| 555 | Fread (c_buffer, 1, len, qd->td->TextFile);
|
---|
| 556 | last_pos = pos + len;
|
---|
| 557 | DecodeText (qd->cd, c_buffer, len, uc_buffer, &ULen);
|
---|
| 558 | fwrite (uc_buffer, ULen, sizeof (u_char), Output);
|
---|
| 559 | return 0;
|
---|
| 560 | }
|
---|
| 561 |
|
---|
| 562 |
|
---|
| 563 | void
|
---|
| 564 | StringOut (FILE * Output, char *string,
|
---|
| 565 | int intvalid, unsigned long intval,
|
---|
| 566 | int floatvalid, double floatval)
|
---|
| 567 | {
|
---|
| 568 | char *s;
|
---|
| 569 | for (s = string; *s; s++)
|
---|
| 570 | if (*s == '%' &&
|
---|
| 571 | (*(s + 1) == 'n' || *(s + 1) == 'w' || *(s + 1) == '%'))
|
---|
| 572 | {
|
---|
| 573 | s++;
|
---|
| 574 | switch (*s)
|
---|
| 575 | {
|
---|
| 576 | case 'n':
|
---|
| 577 | if (intvalid)
|
---|
| 578 | fprintf (Output, "%lu", intval);
|
---|
| 579 | else
|
---|
| 580 | fprintf (Output, "%%n");
|
---|
| 581 | break;
|
---|
| 582 | case 'w':
|
---|
| 583 | if (floatvalid)
|
---|
| 584 | fprintf (Output, "%f", floatval);
|
---|
| 585 | else
|
---|
| 586 | fprintf (Output, "%%w");
|
---|
| 587 | break;
|
---|
| 588 | case '%':
|
---|
| 589 | fputc ('%', Output);
|
---|
| 590 | }
|
---|
| 591 | }
|
---|
| 592 | else
|
---|
| 593 | fputc (*s, Output);
|
---|
| 594 | }
|
---|
| 595 |
|
---|
| 596 |
|
---|
| 597 | void
|
---|
| 598 | HeaderOut (FILE * Output, u_char * UDoc, unsigned long ULen, int heads_length)
|
---|
| 599 | {
|
---|
| 600 | int i, space = 1, num = 0;
|
---|
| 601 | for (i = 0; i < ULen && num < heads_length; i++)
|
---|
| 602 | {
|
---|
| 603 | char c = UDoc[i];
|
---|
| 604 | if (c == '\02')
|
---|
| 605 | break;
|
---|
| 606 |
|
---|
| 607 | if (isspace (c) || c == '\01' || c == '\03')
|
---|
| 608 | {
|
---|
| 609 | if (!space)
|
---|
| 610 | {
|
---|
| 611 | fputc (' ', Output);
|
---|
| 612 | num++;
|
---|
| 613 | }
|
---|
| 614 | space = 1;
|
---|
| 615 | }
|
---|
| 616 | else
|
---|
| 617 | {
|
---|
| 618 | space = 0;
|
---|
| 619 | fputc (c, Output);
|
---|
| 620 | num++;
|
---|
| 621 | }
|
---|
| 622 | }
|
---|
| 623 | }
|
---|
| 624 |
|
---|
| 625 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 626 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 627 | void PrintDocNum(FILE *output, char query_type,
|
---|
| 628 | int docnum, int indexnum, float weight)
|
---|
| 629 | {
|
---|
| 630 | if (query_type == 'R' || query_type == 'A')
|
---|
| 631 | fprintf(output, "%7d.%-7d %6.4f\n", docnum, indexnum, weight);
|
---|
| 632 | else
|
---|
| 633 | fprintf(output, "%7d.%-7d\n", docnum, indexnum);
|
---|
| 634 | }
|
---|
| 635 | #endif
|
---|
| 636 |
|
---|
| 637 | static int
|
---|
| 638 | ProcessDocs (query_data * qd, int num, int verbatim,
|
---|
| 639 | char OutputType, FILE * Output)
|
---|
| 640 | {
|
---|
| 641 | int max_buf = 0;
|
---|
| 642 | int DocCount = 0;
|
---|
| 643 | char *doc_sepstr = NULL;
|
---|
| 644 | char *para_sepstr = NULL;
|
---|
| 645 | char *para_start = NULL;
|
---|
| 646 | int heads_length = atoi (GetDefEnv ("heads_length", "50"));
|
---|
| 647 | char QueryType = get_query_type ();
|
---|
| 648 | int need_text = (OutputType == OUTPUT_TEXT || OutputType == OUTPUT_HILITE ||
|
---|
| 649 | OutputType == OUTPUT_HEADERS || OutputType == OUTPUT_SILENT ||
|
---|
| 650 | post_proc); /* [RJM June 1997 -- fixing post retrieval scan] */
|
---|
| 651 |
|
---|
| 652 | if (OutputType == OUTPUT_TEXT || OutputType == OUTPUT_HILITE)
|
---|
| 653 | {
|
---|
| 654 | if (QueryType == QUERY_APPROX || QueryType == QUERY_RANKED)
|
---|
| 655 | {
|
---|
| 656 | doc_sepstr = de_escape_string (
|
---|
| 657 | Xstrdup (GetDefEnv ("ranked_doc_sepstr",
|
---|
| 658 | "---------------------------------- %n %w\\n")));
|
---|
| 659 | }
|
---|
| 660 | else
|
---|
| 661 | {
|
---|
| 662 | doc_sepstr = de_escape_string (
|
---|
| 663 | Xstrdup (GetDefEnv ("doc_sepstr",
|
---|
| 664 | "---------------------------------- %n\\n")));
|
---|
| 665 | }
|
---|
| 666 | para_sepstr = de_escape_string (
|
---|
| 667 | Xstrdup (GetDefEnv ("para_sepstr",
|
---|
| 668 | "\\n######## PARAGRAPH %n ########\\n")));
|
---|
| 669 |
|
---|
| 670 | para_start = de_escape_string (
|
---|
| 671 | Xstrdup (GetDefEnv ("para_start",
|
---|
| 672 | "***** Weight = %w *****\\n")));
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | if (need_text)
|
---|
| 676 | {
|
---|
| 677 | max_buf = atoi (GetDefEnv ("buffer", "1048576"));
|
---|
| 678 | }
|
---|
| 679 |
|
---|
| 680 | do
|
---|
| 681 | {
|
---|
| 682 | u_char *UDoc = NULL;
|
---|
| 683 | unsigned long ULen;
|
---|
| 684 |
|
---|
| 685 | if (need_text)
|
---|
| 686 | {
|
---|
| 687 | /* load the compressed text */
|
---|
| 688 | if (LoadCompressedText (qd, max_buf))
|
---|
| 689 | {
|
---|
| 690 | Message ("Unable to load compressed text.");
|
---|
| 691 | FatalError (1, "This is probably due to lack of memory.");
|
---|
| 692 | }
|
---|
| 693 |
|
---|
| 694 | /* uncompress the loaded text */
|
---|
| 695 | UDoc = GetDocText (qd, &ULen);
|
---|
| 696 | if (UDoc == NULL)
|
---|
| 697 | FatalError (1, "UDoc is unexpectedly NULL");
|
---|
| 698 | }
|
---|
| 699 |
|
---|
| 700 | if (!UDoc || PostProc ((char *) UDoc, verbatim))
|
---|
| 701 | {
|
---|
| 702 | switch (OutputType)
|
---|
| 703 | {
|
---|
| 704 | case OUTPUT_COUNT:
|
---|
| 705 | case OUTPUT_SILENT:
|
---|
| 706 | break;
|
---|
| 707 | case OUTPUT_DOCNUMS: /* This prints out the docnums string */
|
---|
| 708 | if (PagerRunning)
|
---|
| 709 | {
|
---|
| 710 |
|
---|
| 711 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 712 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 713 | int doc_num = GetDocNum(qd);
|
---|
| 714 |
|
---|
| 715 | if (qd->paragraph)
|
---|
| 716 | {
|
---|
| 717 | if (qd->id->ifh.InvfLevel == 3 &&
|
---|
| 718 | (QueryType == 'R' || QueryType == 'A'))
|
---|
| 719 | {
|
---|
| 720 | /* Print weights for each paragraph in document */
|
---|
| 721 |
|
---|
| 722 |
|
---|
| 723 | int true_doc_num = GetDocNumFromParaNum(qd, doc_num);
|
---|
| 724 |
|
---|
| 725 | /* Get number of paragraphs in this document */
|
---|
| 726 |
|
---|
| 727 | int num_paragraphs =
|
---|
| 728 | qd->paragraph[true_doc_num]-qd->paragraph[true_doc_num-1];
|
---|
| 729 |
|
---|
| 730 | int init_para = FetchInitialParagraph(qd->td,
|
---|
| 731 | doc_num);
|
---|
| 732 | DocEntry *de, *doc_chain = GetDocChain(qd);
|
---|
| 733 | int i;
|
---|
| 734 |
|
---|
| 735 | for (i = 0; i < num_paragraphs; i++)
|
---|
| 736 | {
|
---|
| 737 | if ((de = in_chain(i, init_para, doc_chain)))
|
---|
| 738 | PrintDocNum(Output, QueryType,
|
---|
| 739 | true_doc_num, init_para+i,
|
---|
| 740 | de->Weight);
|
---|
| 741 | }
|
---|
| 742 | }
|
---|
| 743 | else
|
---|
| 744 | PrintDocNum(Output, QueryType,
|
---|
| 745 | GetDocNumFromParaNum(qd, GetDocNum(qd)),
|
---|
| 746 | GetDocNum(qd),
|
---|
| 747 | GetDocWeight(qd));
|
---|
| 748 | }
|
---|
| 749 | else
|
---|
| 750 | {
|
---|
| 751 | PrintDocNum(Output, QueryType,
|
---|
| 752 | doc_num, doc_num, GetDocWeight(qd));
|
---|
| 753 | }
|
---|
| 754 | #else
|
---|
| 755 | fprintf (Output, "%7d %6.4f %7lu\n", GetDocNum (qd),
|
---|
| 756 | GetDocWeight (qd), GetDocCompLength (qd));
|
---|
| 757 | #endif
|
---|
| 758 | }
|
---|
| 759 | break;
|
---|
| 760 | case OUTPUT_HEADERS: /* This prints out the headers of the documents */
|
---|
| 761 | if (PagerRunning)
|
---|
| 762 | fprintf (Output, "%d ", GetDocNum (qd));
|
---|
| 763 | HeaderOut (Output, UDoc, ULen, heads_length);
|
---|
| 764 | if (PagerRunning)
|
---|
| 765 | fputc ('\n', Output);
|
---|
| 766 | break;
|
---|
| 767 | #if TREC_MODE
|
---|
| 768 | case OUTPUT_EXTRAS: /* This prints out the docnums string */
|
---|
| 769 | if (PagerRunning && trec_ids)
|
---|
| 770 | {
|
---|
| 771 | long DN, PN = GetDocNum (qd) - 1;
|
---|
| 772 | if (trec_paras)
|
---|
| 773 | DN = trec_paras[PN];
|
---|
| 774 | else
|
---|
| 775 | DN = PN;
|
---|
| 776 | fprintf (Output, "%-14.14s %8ld %10.5f\n",
|
---|
| 777 | &trec_ids[DN * 14], PN + 1, GetDocWeight (qd));
|
---|
| 778 | }
|
---|
| 779 | break;
|
---|
| 780 | #endif
|
---|
| 781 | case OUTPUT_TEXT:
|
---|
| 782 | case OUTPUT_HILITE:
|
---|
| 783 | {
|
---|
| 784 | int j, para = -1, curr_para = 0;
|
---|
| 785 | int init_para = -1;
|
---|
| 786 | DocEntry *de, *doc_chain = NULL;
|
---|
| 787 | register char ch = ' ';
|
---|
| 788 | register char lch = '\n';
|
---|
| 789 |
|
---|
| 790 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 791 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 792 | if (qd->id->ifh.InvfLevel == 3)
|
---|
| 793 | {
|
---|
| 794 | init_para = FetchInitialParagraph(qd->td, GetDocNum(qd));
|
---|
| 795 |
|
---|
| 796 | StringOut(Output, para_sepstr,
|
---|
| 797 | 1, init_para+curr_para,
|
---|
| 798 | 0, 0);
|
---|
| 799 |
|
---|
| 800 | }
|
---|
| 801 | else
|
---|
| 802 | StringOut(Output, doc_sepstr,
|
---|
| 803 | 1, GetDocNum(qd),
|
---|
| 804 | QueryType == 'A' || QueryType == 'R',
|
---|
| 805 | GetDocWeight(qd));
|
---|
| 806 |
|
---|
| 807 | #else
|
---|
| 808 | int p_on = 0;
|
---|
| 809 |
|
---|
| 810 | if (PagerRunning)
|
---|
| 811 | {
|
---|
| 812 | StringOut (Output, doc_sepstr,
|
---|
| 813 | 1, GetDocNum (qd),
|
---|
| 814 | QueryType == 'A' || QueryType == 'R',
|
---|
| 815 | GetDocWeight (qd));
|
---|
| 816 | }
|
---|
| 817 | if (qd->id->ifh.InvfLevel == 3)
|
---|
| 818 | {
|
---|
| 819 | init_para = FetchInitialParagraph (qd->td, GetDocNum (qd));
|
---|
| 820 | doc_chain = GetDocChain (qd);
|
---|
| 821 | para = GetDocNum (qd) - init_para;
|
---|
| 822 |
|
---|
| 823 | StringOut (Output, para_sepstr,
|
---|
| 824 | 1, curr_para + 1,
|
---|
| 825 | 0, 0);
|
---|
| 826 |
|
---|
| 827 | if ((de = in_chain (0, init_para, doc_chain)))
|
---|
| 828 | StringOut (Output, para_start,
|
---|
| 829 | 0, 0,
|
---|
| 830 | 1, de->Weight);
|
---|
| 831 |
|
---|
| 832 | if (doc_chain->DocNum - init_para == 0)
|
---|
| 833 | p_on = 1;
|
---|
| 834 | }
|
---|
| 835 | #endif
|
---|
| 836 | for (j = 0; j < ULen; j++)
|
---|
| 837 | {
|
---|
| 838 | ch = UDoc[j];
|
---|
| 839 | switch (ch)
|
---|
| 840 | {
|
---|
| 841 | case '\02':
|
---|
| 842 | break;
|
---|
| 843 | case '\01':
|
---|
| 844 | ch = '\n';
|
---|
| 845 | case '\03':
|
---|
| 846 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 847 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 848 | /* print paragraph numbers only if this is
|
---|
| 849 | a level 3 index */
|
---|
| 850 | if (qd->id->ifh.InvfLevel == 3)
|
---|
| 851 | {
|
---|
| 852 | curr_para++;
|
---|
| 853 | StringOut(Output, para_sepstr,
|
---|
| 854 | 1, init_para+curr_para,
|
---|
| 855 | 0, 0);
|
---|
| 856 | }
|
---|
| 857 | #else
|
---|
| 858 | p_on = 0;
|
---|
| 859 | curr_para++;
|
---|
| 860 | StringOut (Output, para_sepstr,
|
---|
| 861 | 1, curr_para + 1,
|
---|
| 862 | 0, 0);
|
---|
| 863 | lch = *(strchr (para_sepstr, '\0') - 1);
|
---|
| 864 | if ((de = in_chain (curr_para, init_para, doc_chain)))
|
---|
| 865 | StringOut (Output, para_start,
|
---|
| 866 | 0, 0,
|
---|
| 867 | 1, de->Weight);
|
---|
| 868 | if (doc_chain &&
|
---|
| 869 | doc_chain->DocNum - init_para == curr_para)
|
---|
| 870 | p_on = 1;
|
---|
| 871 | #endif
|
---|
| 872 | break;
|
---|
| 873 | default:
|
---|
| 874 | {
|
---|
| 875 | if (PagerRunning)
|
---|
| 876 | {
|
---|
| 877 | fputc (ch, Output);
|
---|
| 878 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 879 | #if !defined(PARADOCNUM) && !defined(NZDL)
|
---|
| 880 | if (p_on && isprint (ch))
|
---|
| 881 | {
|
---|
| 882 | fputc ('\b', Output);
|
---|
| 883 | fputc ('_', Output);
|
---|
| 884 | }
|
---|
| 885 | #endif
|
---|
| 886 | }
|
---|
| 887 |
|
---|
| 888 | lch = ch;
|
---|
| 889 | }
|
---|
| 890 | }
|
---|
| 891 | }
|
---|
| 892 | if (PagerRunning && lch != '\n')
|
---|
| 893 | fputc ('\n', Output);
|
---|
| 894 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 895 | #if !defined(PARADOCNUM) && !defined(NZDL)
|
---|
| 896 | p_on = 0;
|
---|
| 897 | #endif
|
---|
| 898 | }
|
---|
| 899 | }
|
---|
| 900 | if (PagerRunning)
|
---|
| 901 | fflush (Output);
|
---|
| 902 |
|
---|
| 903 | DocCount++; /* moved within if statement [RJM June 1997 -- fixing post retrieval scan] */
|
---|
| 904 | }
|
---|
| 905 | }
|
---|
| 906 | while (NextDoc (qd) && PagerRunning && (!Ctrl_C));
|
---|
| 907 |
|
---|
| 908 | if (need_text)
|
---|
| 909 | {
|
---|
| 910 | FreeTextBuffer (qd);
|
---|
| 911 | }
|
---|
| 912 |
|
---|
| 913 | if (OutputType == OUTPUT_TEXT || OutputType == OUTPUT_HILITE)
|
---|
| 914 | {
|
---|
| 915 | Xfree (doc_sepstr);
|
---|
| 916 | Xfree (para_sepstr);
|
---|
| 917 | Xfree (para_start);
|
---|
| 918 | }
|
---|
| 919 |
|
---|
| 920 | return (DocCount);
|
---|
| 921 | }
|
---|
| 922 |
|
---|
| 923 |
|
---|
| 924 | void
|
---|
| 925 | output_terminator (FILE * out)
|
---|
| 926 | {
|
---|
| 927 | char *terminator = Xstrdup (GetDefEnv ("terminator", ""));
|
---|
| 928 | de_escape_string (terminator);
|
---|
| 929 | fputs (terminator, out);
|
---|
| 930 | Xfree (terminator);
|
---|
| 931 | }
|
---|
| 932 |
|
---|
| 933 |
|
---|
| 934 |
|
---|
| 935 |
|
---|
| 936 | /* MoreDocs () */
|
---|
| 937 | /* Displays all documents in list DocList. */
|
---|
| 938 | /* Documents are fetched, then decompressed and displayed according to the */
|
---|
| 939 | /* format implied in FormString(). */
|
---|
| 940 |
|
---|
| 941 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 942 | #ifdef __WIN32__
|
---|
| 943 | # define HILITE_PAGER "mg_hilite_words.exe"
|
---|
| 944 | #else
|
---|
| 945 | # define HILITE_PAGER "mg_hilite_words"
|
---|
| 946 | #endif
|
---|
| 947 |
|
---|
| 948 | #define MAX_HILITE_PAGER_STR 80 /* for command & its options */
|
---|
| 949 |
|
---|
| 950 | static void
|
---|
| 951 | MoreDocs (query_data * qd, char *Query, char OutputType)
|
---|
| 952 | {
|
---|
| 953 | static char terms_str[MAXTERMSTRLEN + 1];
|
---|
| 954 | int DocCount = 0; /* number of actual matches */
|
---|
| 955 | FILE *Output = NULL;
|
---|
| 956 | int using_pipe = 0;
|
---|
| 957 | char *pager = NULL;
|
---|
| 958 |
|
---|
| 959 | Ctrl_C = 0;
|
---|
| 960 |
|
---|
| 961 | qd->num_of_ans = qd->DL->num;
|
---|
| 962 |
|
---|
| 963 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 964 | #ifndef __WIN32__
|
---|
| 965 | signal (SIGPIPE, SIGPIPE_handler);
|
---|
| 966 | #endif
|
---|
| 967 | signal (SIGINT, SIGINT_handler);
|
---|
| 968 |
|
---|
| 969 | PagerRunning = 1;
|
---|
| 970 | if (isatty (fileno (OutFile)) && GetEnv ("pager") &&
|
---|
| 971 | OutputType != OUTPUT_HILITE &&
|
---|
| 972 | OutputType != OUTPUT_SILENT && OutputType != OUTPUT_COUNT)
|
---|
| 973 | {
|
---|
| 974 | pager = GetEnv ("pager");
|
---|
| 975 | }
|
---|
| 976 | else if (isatty (fileno (OutFile)) && OutputType == OUTPUT_HILITE)
|
---|
| 977 | {
|
---|
| 978 | /* concat the pager and its word argument strings */
|
---|
| 979 | ConvertTermsToString (qd->TL, terms_str);
|
---|
| 980 | pager = Xmalloc (MAX_HILITE_PAGER_STR + strlen (terms_str) + 1);
|
---|
| 981 | if (!pager)
|
---|
| 982 | {
|
---|
| 983 | fprintf (stderr, "Unable to allocate memory for highlighting\n");
|
---|
| 984 | return;
|
---|
| 985 | }
|
---|
| 986 | sprintf (pager, "%s --style=%s --pager=%s --stem_method=%ld --stemmer=%ld %s",
|
---|
| 987 | HILITE_PAGER,
|
---|
| 988 | GetEnv ("hilite_style"),
|
---|
| 989 | GetEnv ("pager"),
|
---|
| 990 | qd->sd->sdh.stem_method,
|
---|
| 991 | qd->sd->sdh.stemmer_num,
|
---|
| 992 | terms_str);
|
---|
| 993 |
|
---|
| 994 | }
|
---|
| 995 | else
|
---|
| 996 | {
|
---|
| 997 | Output = OutFile;
|
---|
| 998 | }
|
---|
| 999 |
|
---|
| 1000 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 1001 | #if defined(OUTPUTSTEMMEDWORDS) || defined(NZDL)
|
---|
| 1002 | if (!isatty(fileno(OutFile)) && get_query_type() != QUERY_DOCNUMS)
|
---|
| 1003 | {
|
---|
| 1004 | ConvertTermsToString(qd->TL, terms_str);
|
---|
| 1005 | fprintf(Output, "%s\n", terms_str);
|
---|
| 1006 | }
|
---|
| 1007 | #endif
|
---|
| 1008 | if (pager)
|
---|
| 1009 | {
|
---|
| 1010 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1011 | #ifdef __WIN32__
|
---|
| 1012 | Output = _popen (pager, "w");
|
---|
| 1013 | #else
|
---|
| 1014 | Output = popen (pager, "w");
|
---|
| 1015 | #endif
|
---|
| 1016 | using_pipe = (Output != NULL);
|
---|
| 1017 | if (!using_pipe)
|
---|
| 1018 | {
|
---|
| 1019 | fprintf (stderr, "Unable to run \"%s\"\n", pager);
|
---|
| 1020 | return;
|
---|
| 1021 | }
|
---|
| 1022 | }
|
---|
| 1023 |
|
---|
| 1024 |
|
---|
| 1025 | if (qd->DL->num > 0)
|
---|
| 1026 | {
|
---|
| 1027 | if (OutputType == OUTPUT_COUNT && !post_proc)
|
---|
| 1028 | DocCount = qd->DL->num;
|
---|
| 1029 | else {
|
---|
| 1030 | DocCount = ProcessDocs (qd, qd->DL->num,
|
---|
| 1031 | BooleanEnv (GetEnv ("verbatim"), 1),
|
---|
| 1032 | OutputType, Output);
|
---|
| 1033 | }
|
---|
| 1034 | }
|
---|
| 1035 |
|
---|
| 1036 | if (PagerRunning)
|
---|
| 1037 | {
|
---|
| 1038 | output_terminator (Output);
|
---|
| 1039 | fflush (Output);
|
---|
| 1040 | }
|
---|
| 1041 |
|
---|
| 1042 | if (OutputType == OUTPUT_HILITE && pager)
|
---|
| 1043 | free (pager); /* as needed to malloc to create the pager string */
|
---|
| 1044 |
|
---|
| 1045 | if (using_pipe)
|
---|
| 1046 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1047 | #ifdef __WIN32__
|
---|
| 1048 | _pclose (Output);
|
---|
| 1049 | #else
|
---|
| 1050 | pclose (Output);
|
---|
| 1051 | #endif
|
---|
| 1052 |
|
---|
| 1053 | if (qd->DL->num == 0)
|
---|
| 1054 | fprintf (stderr, "No entries correspond to that query.\n");
|
---|
| 1055 | else
|
---|
| 1056 | {
|
---|
| 1057 | if (OutputType == OUTPUT_COUNT)
|
---|
| 1058 | fprintf (stderr, "%d documents match.\n", DocCount);
|
---|
| 1059 | else
|
---|
| 1060 | fprintf (stderr, "%d documents retrieved.\n", DocCount);
|
---|
| 1061 | }
|
---|
| 1062 |
|
---|
| 1063 | signal (SIGINT, SIG_DFL);
|
---|
| 1064 | }
|
---|
| 1065 |
|
---|
| 1066 |
|
---|
| 1067 | void
|
---|
| 1068 | start_up_stats (query_data * qd, InitQueryTimes iqt)
|
---|
| 1069 | {
|
---|
| 1070 | Clear_Stats ();
|
---|
| 1071 | if (BooleanEnv (GetEnv ("timestats"), 0) ||
|
---|
| 1072 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1073 | StartUpTimeStats (&iqt);
|
---|
| 1074 |
|
---|
| 1075 | if (BooleanEnv (GetEnv ("diskstats"), 0) ||
|
---|
| 1076 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1077 | File_Stats (qd);
|
---|
| 1078 |
|
---|
| 1079 | if (BooleanEnv (GetEnv ("memstats"), 0) ||
|
---|
| 1080 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1081 | MemStats (qd);
|
---|
| 1082 |
|
---|
| 1083 | }
|
---|
| 1084 |
|
---|
| 1085 |
|
---|
| 1086 | void
|
---|
| 1087 | shut_down_stats (query_data * qd, ProgTime * start,
|
---|
| 1088 | ProgTime * invf, ProgTime * text)
|
---|
| 1089 | {
|
---|
| 1090 | Clear_Stats ();
|
---|
| 1091 | if (BooleanEnv (GetEnv ("timestats"), 0) ||
|
---|
| 1092 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1093 | QueryTimeStats (start, invf, text);
|
---|
| 1094 |
|
---|
| 1095 | if (BooleanEnv (GetEnv ("diskstats"), 0) ||
|
---|
| 1096 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1097 | {
|
---|
| 1098 | TransFileStats (qd);
|
---|
| 1099 | File_Stats (qd);
|
---|
| 1100 | }
|
---|
| 1101 |
|
---|
| 1102 | if (BooleanEnv (GetEnv ("sizestats"), 0))
|
---|
| 1103 | TotalSizeStats (qd);
|
---|
| 1104 | }
|
---|
| 1105 |
|
---|
| 1106 |
|
---|
| 1107 |
|
---|
| 1108 | char *wordfreqword2str (u_char * s)
|
---|
| 1109 | {
|
---|
| 1110 | static char buf[1024];
|
---|
| 1111 | int i, len = (int) *s++;
|
---|
| 1112 |
|
---|
| 1113 | for (i = 0; i < len; i++)
|
---|
| 1114 | {
|
---|
| 1115 | buf[i] = (char)s[i];
|
---|
| 1116 | }
|
---|
| 1117 | buf[len] = '\0';
|
---|
| 1118 |
|
---|
| 1119 | return buf;
|
---|
| 1120 | }
|
---|
| 1121 |
|
---|
| 1122 |
|
---|
| 1123 | /* [RPAP - Feb 97: Term Frequency] */
|
---|
| 1124 | /*********************************
|
---|
| 1125 | * PrintQueryTermFreq
|
---|
| 1126 | *
|
---|
| 1127 | * Prints the query terms and their respective frequencies within the collection
|
---|
| 1128 | *********************************/
|
---|
| 1129 | void
|
---|
| 1130 | PrintQueryTermFreqs (QueryTermList *qtl)
|
---|
| 1131 | {
|
---|
| 1132 | int i;
|
---|
| 1133 |
|
---|
| 1134 | /* Print the number of terms */
|
---|
| 1135 | fprintf (OutFile, "%d\n", qtl->num);
|
---|
| 1136 |
|
---|
| 1137 | /* Print the terms and their respective frequency within the collection */
|
---|
| 1138 | for (i = 0; i < qtl->num; i++)
|
---|
| 1139 | if (qtl->QTE[i].stem_method == -1)
|
---|
| 1140 | /* Using default stem method - don't print stem method beside term */
|
---|
| 1141 | fprintf (OutFile, "%s %d\n", wordfreqword2str (qtl->QTE[i].Term), qtl->QTE[i].Count);
|
---|
| 1142 | else
|
---|
| 1143 | /* Term was forced with a stem, print stem method with term */
|
---|
| 1144 | fprintf (OutFile, "%s#%d %d\n", wordfreqword2str (qtl->QTE[i].Term), qtl->QTE[i].stem_method, qtl->QTE[i].Count);
|
---|
| 1145 | }
|
---|
| 1146 |
|
---|
| 1147 |
|
---|
| 1148 | void
|
---|
| 1149 | query (void)
|
---|
| 1150 | {
|
---|
| 1151 | ProgTime TotalStartTime, TotalInvfTime, TotalTextTime;
|
---|
| 1152 | InitQueryTimes iqt;
|
---|
| 1153 | query_data *qd;
|
---|
| 1154 |
|
---|
| 1155 | TotalStartTime.RealTime = TotalStartTime.CPUTime = 0;
|
---|
| 1156 | TotalInvfTime.RealTime = TotalInvfTime.CPUTime = 0;
|
---|
| 1157 | TotalTextTime.RealTime = TotalTextTime.CPUTime = 0;
|
---|
| 1158 |
|
---|
| 1159 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1160 | #ifdef __WIN32__
|
---|
| 1161 | qd = InitQuerySystem (GetDefEnv ("mgdir", ".\\"),
|
---|
| 1162 | GetDefEnv ("mgname", ""),
|
---|
| 1163 | GetDefEnv ("textname", NULL), /* [RJM 06/97: text filename] */
|
---|
| 1164 | &iqt);
|
---|
| 1165 | #else
|
---|
| 1166 | qd = InitQuerySystem (GetDefEnv ("mgdir", "./"),
|
---|
| 1167 | GetDefEnv ("mgname", ""),
|
---|
| 1168 | GetDefEnv ("textname", NULL), /* [RJM 06/97: text filename] */
|
---|
| 1169 | &iqt);
|
---|
| 1170 | #endif
|
---|
| 1171 |
|
---|
| 1172 | if (!qd)
|
---|
| 1173 | FatalError (1, mg_errorstrs[mg_errno], mg_error_data);
|
---|
| 1174 | start_up_stats (qd, iqt);
|
---|
| 1175 |
|
---|
| 1176 |
|
---|
| 1177 | while (1)
|
---|
| 1178 | {
|
---|
| 1179 | ProgTime StartTime, InvfTime, TextTime;
|
---|
| 1180 | char QueryType;
|
---|
| 1181 | char OutputType;
|
---|
| 1182 | char *line;
|
---|
| 1183 | ResetFileStats (qd);
|
---|
| 1184 | qd->max_mem_in_use = qd->mem_in_use = 0;
|
---|
| 1185 |
|
---|
| 1186 | qd->tot_hops_taken += qd->hops_taken;
|
---|
| 1187 | qd->tot_num_of_ptrs += qd->num_of_ptrs;
|
---|
| 1188 | qd->tot_num_of_accum += qd->num_of_accum;
|
---|
| 1189 | qd->tot_num_of_terms += qd->num_of_terms;
|
---|
| 1190 | qd->tot_num_of_ans += qd->num_of_ans;
|
---|
| 1191 | qd->tot_text_idx_lookups += qd->text_idx_lookups;
|
---|
| 1192 | qd->hops_taken = qd->num_of_ptrs = 0;
|
---|
| 1193 | qd->num_of_accum = qd->num_of_ans = qd->num_of_terms = 0;
|
---|
| 1194 | qd->text_idx_lookups = 0;
|
---|
| 1195 |
|
---|
| 1196 | Display_Stats (stderr);
|
---|
| 1197 | Clear_Stats ();
|
---|
| 1198 | line = get_query (qd);
|
---|
| 1199 | if (!line || Quitting)
|
---|
| 1200 | break;
|
---|
| 1201 |
|
---|
| 1202 | GetPostProc (line);
|
---|
| 1203 |
|
---|
| 1204 | GetTime (&StartTime);
|
---|
| 1205 |
|
---|
| 1206 | FreeQueryDocs (qd);
|
---|
| 1207 |
|
---|
| 1208 | QueryType = get_query_type ();
|
---|
| 1209 | OutputType = get_output_type ();
|
---|
| 1210 | /* No point in hiliting words on a docnum query */
|
---|
| 1211 | if (OutputType == OUTPUT_HILITE && QueryType == QUERY_DOCNUMS)
|
---|
| 1212 | OutputType = OUTPUT_TEXT;
|
---|
| 1213 |
|
---|
| 1214 | switch (QueryType)
|
---|
| 1215 | {
|
---|
| 1216 | case QUERY_BOOLEAN:
|
---|
| 1217 | {
|
---|
| 1218 | char *maxdocs;
|
---|
| 1219 | BooleanQueryInfo bqi;
|
---|
| 1220 | maxdocs = GetDefEnv ("maxdocs", "all");
|
---|
| 1221 | bqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
|
---|
| 1222 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 1223 | if (qd->sd->sdh.indexed)
|
---|
| 1224 | BooleanQuery (qd, line, &bqi, (BooleanEnv (GetEnv ("casefold"), 0) |
|
---|
| 1225 | (BooleanEnv (GetEnv ("stem"), 0) << 1)));
|
---|
| 1226 | else
|
---|
| 1227 | BooleanQuery (qd, line, &bqi, qd->sd->sdh.stem_method);
|
---|
| 1228 |
|
---|
| 1229 | break;
|
---|
| 1230 | }
|
---|
| 1231 | case QUERY_APPROX:
|
---|
| 1232 | case QUERY_RANKED:
|
---|
| 1233 | {
|
---|
| 1234 | char *maxdocs;
|
---|
| 1235 | char *maxterms;
|
---|
| 1236 | char *maxaccum;
|
---|
| 1237 | RankedQueryInfo rqi;
|
---|
| 1238 | maxdocs = GetDefEnv ("maxdocs", "all");
|
---|
| 1239 | maxterms = GetDefEnv ("max_terms", "all");
|
---|
| 1240 | maxaccum = GetDefEnv ("max_accumulators", "all");
|
---|
| 1241 | rqi.Sort = BooleanEnv (GetEnv ("sorted_terms"), 0);
|
---|
| 1242 | rqi.QueryFreqs = BooleanEnv (GetEnv ("qfreq"), 1);
|
---|
| 1243 | rqi.Exact = QueryType == QUERY_RANKED;
|
---|
| 1244 | rqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
|
---|
| 1245 | rqi.MaxTerms = strcmp (maxterms, "all") ? atoi (maxterms) : -1;
|
---|
| 1246 | rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve;
|
---|
| 1247 | if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas"))
|
---|
| 1248 | rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
|
---|
| 1249 | rqi.AccumMethod = toupper (*GetDefEnv ("accumulator_method", "A"));
|
---|
| 1250 | rqi.MaxAccums = strcmp (maxaccum, "all") ? atoi (maxaccum) : -1;
|
---|
| 1251 | rqi.HashTblSize = IntEnv (GetEnv ("hash_tbl_size"), 1000);
|
---|
| 1252 | rqi.StopAtMaxAccum = BooleanEnv (GetEnv ("stop_at_max_accum"), 0);
|
---|
| 1253 | rqi.skip_dump = GetEnv ("skip_dump");
|
---|
| 1254 | RankedQuery (qd, line, &rqi);
|
---|
| 1255 | break;
|
---|
| 1256 | }
|
---|
| 1257 | case QUERY_DOCNUMS:
|
---|
| 1258 | {
|
---|
| 1259 | DocnumsQuery (qd, line);
|
---|
| 1260 | break;
|
---|
| 1261 | }
|
---|
| 1262 | }
|
---|
| 1263 |
|
---|
| 1264 | GetTime (&InvfTime);
|
---|
| 1265 |
|
---|
| 1266 | /* [RPAP - Feb 97: Term Frequency] */
|
---|
| 1267 | if (qd->QTL && BooleanEnv (GetEnv ("term_freq"), 0))
|
---|
| 1268 | PrintQueryTermFreqs (qd->QTL);
|
---|
| 1269 |
|
---|
| 1270 | if (qd->DL)
|
---|
| 1271 | MoreDocs (qd, line, OutputType);
|
---|
| 1272 |
|
---|
| 1273 | GetTime (&TextTime);
|
---|
| 1274 |
|
---|
| 1275 | if (BooleanEnv (GetEnv ("timestats"), 0) ||
|
---|
| 1276 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1277 | QueryTimeStats (&StartTime, &InvfTime, &TextTime);
|
---|
| 1278 |
|
---|
| 1279 | if (BooleanEnv (GetEnv ("diskstats"), 0) ||
|
---|
| 1280 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1281 | File_Stats (qd);
|
---|
| 1282 |
|
---|
| 1283 | if (BooleanEnv (GetEnv ("memstats"), 0) ||
|
---|
| 1284 | BooleanEnv (GetEnv ("briefstats"), 0))
|
---|
| 1285 | MemStats (qd);
|
---|
| 1286 |
|
---|
| 1287 | if (BooleanEnv (GetEnv ("sizestats"), 0))
|
---|
| 1288 | SizeStats (qd);
|
---|
| 1289 |
|
---|
| 1290 | TotalInvfTime.RealTime += InvfTime.RealTime - StartTime.RealTime;
|
---|
| 1291 | TotalInvfTime.CPUTime += InvfTime.CPUTime - StartTime.CPUTime;
|
---|
| 1292 | TotalTextTime.RealTime += TextTime.RealTime - StartTime.RealTime;
|
---|
| 1293 | TotalTextTime.CPUTime += TextTime.CPUTime - StartTime.CPUTime;
|
---|
| 1294 | }
|
---|
| 1295 |
|
---|
| 1296 | if (isatty (fileno (InFile)) && !Quitting)
|
---|
| 1297 | fprintf (stderr, "\n");
|
---|
| 1298 |
|
---|
| 1299 | shut_down_stats (qd, &TotalStartTime, &TotalInvfTime, &TotalTextTime);
|
---|
| 1300 |
|
---|
| 1301 | Display_Stats (stderr);
|
---|
| 1302 |
|
---|
| 1303 | }
|
---|
| 1304 |
|
---|
| 1305 |
|
---|
| 1306 | void
|
---|
| 1307 | search_for_collection (char *name)
|
---|
| 1308 | {
|
---|
| 1309 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1310 | #ifdef __WIN32__
|
---|
| 1311 | char *dir = GetDefEnv ("mgdir", ".\\");
|
---|
| 1312 | #else
|
---|
| 1313 | char *dir = GetDefEnv ("mgdir", "./");
|
---|
| 1314 | #endif
|
---|
| 1315 | char buffer[512];
|
---|
| 1316 | struct stat stat_buf;
|
---|
| 1317 | if (strrchr (dir, '/') && *(strrchr (dir, '/') + 1) != '\0')
|
---|
| 1318 | {
|
---|
| 1319 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1320 | #ifdef __WIN32__
|
---|
| 1321 | sprintf (buffer, "%s", dir);
|
---|
| 1322 | #else
|
---|
| 1323 | sprintf (buffer, "%s/", dir);
|
---|
| 1324 | #endif
|
---|
| 1325 | SetEnv ("mgdir", buffer, NULL);
|
---|
| 1326 | dir = GetEnv ("mgdir");
|
---|
| 1327 | }
|
---|
| 1328 |
|
---|
| 1329 | sprintf (buffer, "%s.text", name);
|
---|
| 1330 | if (stat (buffer, &stat_buf) != -1)
|
---|
| 1331 | {
|
---|
| 1332 | if ((stat_buf.st_mode & S_IFREG) != 0)
|
---|
| 1333 | {
|
---|
| 1334 | /* The name is a directory */
|
---|
| 1335 | SetEnv ("mgname", name, NULL);
|
---|
| 1336 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1337 | #ifdef __WIN32__
|
---|
| 1338 | SetEnv ("mgdir", ".\\", NULL);
|
---|
| 1339 | #else
|
---|
| 1340 | SetEnv ("mgdir", "./", NULL);
|
---|
| 1341 | #endif
|
---|
| 1342 | return;
|
---|
| 1343 | }
|
---|
| 1344 | }
|
---|
| 1345 |
|
---|
| 1346 | sprintf (buffer, "%s%s", dir, name);
|
---|
| 1347 | if (stat (buffer, &stat_buf) != -1)
|
---|
| 1348 | {
|
---|
| 1349 | if ((stat_buf.st_mode & S_IFDIR) != 0)
|
---|
| 1350 | {
|
---|
| 1351 | /* The name is a directory */
|
---|
| 1352 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1353 | #ifdef __WIN32__
|
---|
| 1354 | sprintf (buffer, "%s%s", name, name);
|
---|
| 1355 | #else
|
---|
| 1356 | sprintf (buffer, "%s/%s", name, name);
|
---|
| 1357 | #endif
|
---|
| 1358 | SetEnv ("mgname", buffer, NULL);
|
---|
| 1359 | return;
|
---|
| 1360 | }
|
---|
| 1361 | }
|
---|
| 1362 |
|
---|
| 1363 | /* Look in the current directory last */
|
---|
| 1364 | if (stat (name, &stat_buf) != -1)
|
---|
| 1365 | {
|
---|
| 1366 | if ((stat_buf.st_mode & S_IFDIR) != 0)
|
---|
| 1367 | {
|
---|
| 1368 | /* The name is a directory */
|
---|
| 1369 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1370 | #ifdef __WIN32__
|
---|
| 1371 | sprintf (buffer, "%s%s", name, name);
|
---|
| 1372 | SetEnv ("mgdir", ".\\", NULL);
|
---|
| 1373 | #else
|
---|
| 1374 | sprintf (buffer, "%s/%s", name, name);
|
---|
| 1375 | SetEnv ("mgdir", "./", NULL);
|
---|
| 1376 | #endif
|
---|
| 1377 | SetEnv ("mgname", buffer, NULL);
|
---|
| 1378 | return;
|
---|
| 1379 | }
|
---|
| 1380 | }
|
---|
| 1381 |
|
---|
| 1382 | SetEnv ("mgname", name, NULL);
|
---|
| 1383 | }
|
---|
| 1384 |
|
---|
| 1385 | /* main () */
|
---|
| 1386 | /* Initialises global variables based on command line switches, and opens */
|
---|
| 1387 | /* files. Then calls query () to perform the querying. */
|
---|
| 1388 | int main (int argc, char **argv)
|
---|
| 1389 | {
|
---|
| 1390 | ProgTime StartTime;
|
---|
| 1391 | int decomp = 0;
|
---|
| 1392 | int ch;
|
---|
| 1393 |
|
---|
| 1394 | msg_prefix = argv[0];
|
---|
| 1395 | GetTime (&StartTime);
|
---|
| 1396 |
|
---|
| 1397 | /* Initialise the environment with default values */
|
---|
| 1398 |
|
---|
| 1399 | InitEnv ();
|
---|
| 1400 |
|
---|
| 1401 | read_mgrc_file ();
|
---|
| 1402 |
|
---|
| 1403 | OutFile = stdout;
|
---|
| 1404 | InFile = stdin;
|
---|
| 1405 |
|
---|
| 1406 | opterr = 0;
|
---|
| 1407 | /* [RJM 06/97: text filename] */
|
---|
| 1408 | while ((ch = getopt (argc, argv, "Df:d:t:h")) != -1) {
|
---|
| 1409 | switch (ch) {
|
---|
| 1410 | case 'f':
|
---|
| 1411 | SetEnv ("mgname", optarg, NULL);
|
---|
| 1412 | break;
|
---|
| 1413 | case 'd':
|
---|
| 1414 | SetEnv ("mgdir", optarg, NULL);
|
---|
| 1415 | break;
|
---|
| 1416 | case 't': /* [RJM 06/97: text filename] */
|
---|
| 1417 | SetEnv ("textname", optarg, NULL);
|
---|
| 1418 | break;
|
---|
| 1419 | case 'D':
|
---|
| 1420 | decomp = 1;
|
---|
| 1421 | break;
|
---|
| 1422 | case 'h':
|
---|
| 1423 | case '?':
|
---|
| 1424 | fprintf (stderr, "usage: %s [-D] [-f base name of collection] "
|
---|
| 1425 | "[-t base name of files for text] " /* [RJM 06/97: text filename] */
|
---|
| 1426 | "[-d data directory] [collection]\n", argv[0]);
|
---|
| 1427 | exit (1);
|
---|
| 1428 | }
|
---|
| 1429 | }
|
---|
| 1430 |
|
---|
| 1431 | PushEnv ();
|
---|
| 1432 |
|
---|
| 1433 | if (decomp == 0)
|
---|
| 1434 | {
|
---|
| 1435 |
|
---|
| 1436 | Init_ReadLine ();
|
---|
| 1437 |
|
---|
| 1438 | /* write a first prompt, let the user start thinking */
|
---|
| 1439 | if (!BooleanEnv (GetEnv ("expert"), 0) && isatty (fileno (InFile)))
|
---|
| 1440 | {
|
---|
| 1441 | fprintf (stderr, "\n\n\t FULL TEXT RETRIEVAL QUERY PROGRAM\n");
|
---|
| 1442 | fprintf (stderr, "%24s%s\n\n", "", *"21 Mar 1994" == '%' ? __DATE__ : "21 Mar 1994");
|
---|
| 1443 | fprintf (stderr, "\n");
|
---|
| 1444 | fprintf (stderr, " mgquery version " VERSION ", Copyright (C) 1994 Neil Sharman\n");
|
---|
| 1445 | fprintf (stderr, " mgquery comes with ABSOLUTELY NO WARRANTY; for details type `.warranty'\n");
|
---|
| 1446 | fprintf (stderr, " This is free software, and you are welcome to redistribute it\n");
|
---|
| 1447 | fprintf (stderr, " under certain conditions; type `.conditions' for details.\n");
|
---|
| 1448 | fprintf (stderr, "\n");
|
---|
| 1449 | }
|
---|
| 1450 | }
|
---|
| 1451 | if (optind < argc)
|
---|
| 1452 | search_for_collection (argv[optind]);
|
---|
| 1453 |
|
---|
| 1454 | if (decomp == 0)
|
---|
| 1455 | {
|
---|
| 1456 | query ();
|
---|
| 1457 | }
|
---|
| 1458 | else
|
---|
| 1459 | {
|
---|
| 1460 | int i;
|
---|
| 1461 | InitQueryTimes iqt;
|
---|
| 1462 | query_data *qd;
|
---|
| 1463 |
|
---|
| 1464 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 1465 | #ifdef __WIN32__
|
---|
| 1466 | qd = InitQuerySystem (GetDefEnv ("mgdir", ".\\"),
|
---|
| 1467 | GetDefEnv ("mgname", ""),
|
---|
| 1468 | GetDefEnv ("textname", NULL), /* [RJM 06/97: text filename] */
|
---|
| 1469 | &iqt);
|
---|
| 1470 | #else
|
---|
| 1471 | qd = InitQuerySystem (GetDefEnv ("mgdir", "./"),
|
---|
| 1472 | GetDefEnv ("mgname", ""),
|
---|
| 1473 | GetDefEnv ("textname", NULL), /* [RJM 06/97: text filename] */
|
---|
| 1474 | &iqt);
|
---|
| 1475 | #endif
|
---|
| 1476 | if (!qd)
|
---|
| 1477 | FatalError (1, mg_errorstrs[mg_errno], mg_error_data);
|
---|
| 1478 |
|
---|
| 1479 |
|
---|
| 1480 | start_up_stats (qd, iqt);
|
---|
| 1481 |
|
---|
| 1482 | Display_Stats (stderr);
|
---|
| 1483 | for (i = 0; i < qd->td->cth.num_of_docs; i++)
|
---|
| 1484 | {
|
---|
| 1485 | RawDocOutput (qd, i + 1, stdout);
|
---|
| 1486 | putc ('\2', stdout);
|
---|
| 1487 | }
|
---|
| 1488 | Message ("%s", ElapsedTime (&StartTime, NULL));
|
---|
| 1489 |
|
---|
| 1490 | FinishQuerySystem (qd);
|
---|
| 1491 | }
|
---|
| 1492 |
|
---|
| 1493 | UninitEnv ();
|
---|
| 1494 | return 0;
|
---|
| 1495 | }
|
---|