source: trunk/gsdl/src/colservr/mgq.c@ 301

Last change on this file since 301 was 115, checked in by rjmcnab, 25 years ago

Made the source more portable.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1/**********************************************************************
2 *
3 * mgq.c -- cut-down version of mgquery
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: mgq.c 115 1999-01-19 01:38:20Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.3 1999/01/19 01:38:16 rjmcnab
15 Made the source more portable.
16
17 Revision 1.2 1999/01/12 01:51:02 rjmcnab
18
19 Standard header.
20
21 Revision 1.1 1999/01/08 09:02:22 rjmcnab
22
23 Moved from src/library.
24
25 */
26
27
28#include "mgq.h"
29
30
31#include <stdio.h>
32#include <string.h>
33/* #include <io.h> */
34#include <fcntl.h>
35
36#ifdef __cplusplus
37extern "C" {
38#endif
39
40#include "sysfuncs.h"
41
42#include "messages.h"
43#include "memlib.h"
44
45#include "invf.h"
46#include "text.h"
47#include "lists.h"
48#include "backend.h"
49#include "environment.h"
50#include "globals.h"
51#include "mg_errors.h"
52#include "commands.h"
53#include "text_get.h"
54#include "term_lists.h"
55#include "local_strings.h"
56
57#ifdef __cplusplus
58}
59#endif
60
61
62#include "mgq.h"
63
64/* get a reasonable database cache size */
65#ifndef MAXNUMDATABASEINFO
66# ifdef GSDLSERVER
67# define MAXNUMDATABASEINFO 10
68# else
69# define MAXNUMDATABASEINFO 2
70# endif
71#endif
72
73#define MAXCOLLECTIONLEN 16
74#define MAXMGDIRLEN 256
75#define MAXGENSUFFIXLEN 256
76#define MAXTEXTSUFFIXLEN 256
77
78typedef struct DatabaseInfo {
79 int accessnum; /* -1 = invalid record */
80 char collection[MAXCOLLECTIONLEN];
81 char mgdir[MAXMGDIRLEN];
82 char gensuffix[MAXGENSUFFIXLEN];
83 char textsuffix[MAXTEXTSUFFIXLEN];
84 query_data *qd;
85} DatabaseInfo;
86
87
88/* globals needed by some vague part of mg... */
89FILE *OutFile = NULL, *InFile = NULL;
90int OutPipe = 0, InPipe = 0;
91int Quitting = 0;
92
93/* globals needed to handle loading of databases */
94static int cur_cachenum = -1;
95
96/* globals needed by the database cache */
97static DatabaseInfo dbcache[MAXNUMDATABASEINFO];
98static int cache_nextaccessnum = 0;
99static int cache_numloaded = 0;
100
101
102
103#if defined(PARADOCNUM) || defined(NZDL)
104static int GetDocNumFromParaNum(query_data *qd, int paranum) {
105 int Documents = qd->td->cth.num_of_docs;
106 int *Paragraph = qd->paragraph;
107 int low = 1, high = Documents;
108 int mid = (low+high)/2;
109
110 while ((mid = (low+high)/2) >=1 && mid <= Documents)
111 {
112 if (paranum > Paragraph[mid])
113 low = mid+1;
114 else if (paranum <= Paragraph[mid-1])
115 high = mid-1;
116 else
117 return mid;
118 }
119 FatalError(1, "Bad paragraph number.\n");
120 return 0;
121}
122#endif
123
124
125/*****************************************************************************/
126
127static void MGQError(char *emsg)
128{
129 fprintf(stderr,"Fatal error: %s\n", emsg);
130 exit(1);
131}
132
133static int ProcessDocs (query_data * qd, int skip, int howmany,
134 enum result_kinds kind,
135 int (*sender)(char *,int,int,float,void *), void *ptr)
136{
137 int max_buf = 0, output_failure = 0;
138 int DocCount = 0;
139 int need_text = (kind == result_docs);
140
141 for (;;) {
142 if (skip <= 0) break;
143 skip--;
144 if (!NextDoc(qd)) return 0;
145 }
146
147 if (need_text)
148 {
149 max_buf = atoi (GetDefEnv ("buffer", "1048576"));
150 }
151
152 do
153 {
154 u_char *UDoc = NULL;
155 unsigned long ULen=0;
156
157 if (need_text)
158 {
159 /* load the compressed text */
160 if (LoadCompressedText (qd, max_buf))
161 {
162 MGQError("Unable to load compressed text(memory?).");
163 }
164 /* uncompress the loaded text */
165 UDoc = GetDocText (qd, &ULen);
166 if (UDoc == NULL)
167 MGQError("UDoc is unexpectedly NULL");
168 }
169
170 if (UDoc != NULL || kind == result_docnums)
171 {
172 int docnum = GetDocNum(qd);
173#if defined(PARADOCNUM) || defined(NZDL)
174 if (qd->id->ifh.InvfLevel == 3) docnum = GetDocNumFromParaNum(qd, docnum);
175#endif
176 switch (kind)
177 {
178 case result_docnums:
179 if (sender != NULL)
180 output_failure = (*sender)("",0,docnum,GetDocWeight(qd),ptr);
181 break;
182 case result_docs:
183 if (sender != NULL)
184 output_failure = (*sender)((char *)UDoc,ULen,docnum,GetDocWeight(qd),ptr);
185 break;
186 default:
187 break;
188 }
189 }
190 DocCount++;
191
192 }
193 while (NextDoc (qd) && output_failure == 0 && --howmany > 0);
194
195 if (need_text)
196 {
197 FreeTextBuffer (qd);
198 }
199
200 return (DocCount);
201}
202
203
204static void send_query_term_freqs(QueryTermList *qtl,
205 int (*sender)(char *,int,int,float,void *), void *ptr)
206{
207 int i = 0;
208 for (i = 0; i < qtl->num; i++)
209 if (sender != NULL) {
210 /* word = word2str(qtl->QTE[i].Term);
211 (* sender)(word, strlen(word), qtl->QTE[i].Count, (float)0.0, ptr); */
212 (* sender)((char *)(qtl->QTE[i].Term+1), qtl->QTE[i].Term[0],
213 qtl->QTE[i].Count, (float)0.0, ptr);
214 }
215}
216
217
218static void send_terms (TermList * qtl,
219 int (*sender)(char *,int,int,float,void *), void *ptr)
220{
221 int i = 0;
222 if (sender == NULL) return;
223 for (i = 0; i < qtl->num; i++)
224 {
225 /* word = word2str(qtl->TE[i].Word);
226 (* sender)(word, strlen(word), qtl->TE[i].Count, (float)0.0, ptr);*/
227 (* sender)((char *)(qtl->TE[i].Word+1), qtl->TE[i].Word[0],
228 qtl->TE[i].Count, (float)0.0, ptr);
229 }
230}
231
232
233/* MoreDocs () */
234/* Displays all documents in list DocList. */
235/* Documents are fetched, then decompressed and displayed according to the */
236/* format implied in FormString(). */
237
238static void
239MoreDocs (query_data * qd, enum result_kinds kind,
240 int skip, int howmany,
241 int (*sender)(char *,int,int,float,void *), void *ptr)
242{
243 qd->num_of_ans = qd->DL->num;
244 switch (kind) {
245 case result_docs:
246 case result_docnums:
247 if (qd->num_of_ans > 0)
248 ProcessDocs (qd, skip, howmany, kind, sender, ptr);
249 break;
250 case result_termfreqs:
251 send_query_term_freqs(qd->QTL, sender, ptr);
252 break;
253 case result_terms:
254 send_terms(qd->TL, sender, ptr);
255 break;
256 }
257}
258
259
260
261
262
263
264/******************************************
265 * functions to handle the database cache *
266 ******************************************/
267
268/* init_dbcache should be called at the start of each */
269/* function which deals with the database cache */
270static void init_dbcache (void) {
271 static int dbcacheinited = 0;
272 int i = 0;
273
274 if (dbcacheinited) return;
275
276 cache_numloaded = 0;
277
278 for (i=0; i<MAXNUMDATABASEINFO; i++) {
279 dbcache[i].accessnum = -1;
280 dbcache[i].collection[0] = '\0';
281 dbcache[i].mgdir[0] = '\0';
282 dbcache[i].gensuffix[0] = '\0';
283 dbcache[i].textsuffix[0] = '\0';
284 dbcache[i].qd = NULL;
285 }
286
287 dbcacheinited = 1;
288}
289
290/* returns the next cache access number and increments it */
291static int get_next_accessnum (void) {
292 return cache_nextaccessnum++;
293}
294
295/* get_free_dbcache returns the cache number which */
296/* was used the longest time ago */
297/* init_dbcache should be called before this function */
298static int get_free_dbcache (void) {
299 int i = 0;
300 int minaccessnum = cache_nextaccessnum; /* the current max */
301 int minpos = 0;
302
303 for (i=0; i<MAXNUMDATABASEINFO; i++) {
304 if (dbcache[i].accessnum < minaccessnum) {
305 minaccessnum = dbcache[i].accessnum;
306 minpos = i;
307 }
308 }
309
310 return minpos;
311}
312
313/* search_doc_collect will search for an index which */
314/* belongs to a certain collection and which has a document */
315/* level index. It returns -1 if none could be found. */
316/* init_dbcache should be called before this function */
317static int search_doc_collect (char *collection) {
318 int i = 0;
319
320 for (i=0; i<MAXNUMDATABASEINFO; i++) {
321 if ((dbcache[i].accessnum >= 0) &&
322 (dbcache[i].qd != NULL) &&
323 (strcmp (collection, dbcache[i].collection) == 0) &&
324 (dbcache[i].qd->id->ifh.InvfLevel == 2)) {
325 dbcache[i].accessnum = get_next_accessnum ();
326 return i;
327 }
328 }
329
330 return -1;
331}
332
333/* search_doc_collect will search for an index which */
334/* has a certain gensuffix. It returns -1 if none could be found. */
335/* init_dbcache should be called before this function */
336static int search_gensuffix (char *gensuffix) {
337 int i = 0;
338
339 for (i=0; i<MAXNUMDATABASEINFO; i++) {
340 if ((dbcache[i].accessnum >= 0) &&
341 (dbcache[i].qd != NULL) &&
342 (strcmp (gensuffix, dbcache[i].gensuffix) == 0)) {
343 dbcache[i].accessnum = get_next_accessnum ();
344 return i;
345 }
346 }
347
348 return -1;
349}
350
351/* unload_database will unload a certain entry within */
352/* the database cache, clearing it for furture use. */
353static void unload_database (int i) {
354 /* check to see if it contains anything */
355 if (dbcache[i].accessnum < 0 || dbcache[i].qd == NULL)
356 return;
357
358 /* unload all the query information */
359 FinishQuerySystem(dbcache[i].qd);
360
361 /* reset all the db info */
362 dbcache[i].accessnum = -1;
363 dbcache[i].collection[0] = '\0';
364 dbcache[i].mgdir[0] = '\0';
365 dbcache[i].gensuffix[0] = '\0';
366 dbcache[i].textsuffix[0] = '\0';
367 dbcache[i].qd = NULL;
368
369 cache_numloaded--;
370 if (cache_numloaded < 0) cache_numloaded = 0;
371}
372
373/* cache_database will store the information about */
374/* a database in the database cache. */
375static void cache_database (int i, char *collection, char *mgdir, char *gensuffix,
376 char *textsuffix, query_data *qd) {
377 /* make sure this entry has been unloaded first */
378 if (dbcache[i].accessnum >= 0 && dbcache[i].qd != NULL)
379 unload_database (i);
380
381 /* store the db info */
382 dbcache[i].accessnum = get_next_accessnum ();
383 strcpy (dbcache[i].collection, collection);
384 strcpy (dbcache[i].mgdir, mgdir);
385 strcpy (dbcache[i].gensuffix, gensuffix);
386 strcpy (dbcache[i].textsuffix, textsuffix);
387 dbcache[i].qd = qd;
388
389 cache_numloaded++;
390}
391
392static void make_current (int i) {
393 /* see if it is the current index */
394 if (i == cur_cachenum) return;
395
396 /* unload the old index */
397 if (cur_cachenum >= 0) UninitEnv ();
398 cur_cachenum = -1;
399
400 /* make sure the new one is ok */
401 if (i < 0 || dbcache[i].accessnum < 0 || dbcache[i].qd == NULL)
402 return;
403
404 /* load the new one */
405
406 /* Initialise the environment with default values */
407 InitEnv ();
408
409 SetEnv("mgdir",dbcache[i].mgdir,NULL);
410 SetEnv("mgname",dbcache[i].gensuffix,NULL);
411 SetEnv("textname",dbcache[i].textsuffix,NULL);
412
413 PushEnv ();
414
415 cur_cachenum = i;
416}
417
418
419
420/********************
421 * public functions *
422 ********************/
423
424int mgq_ask(char *line)
425{
426 query_data *qd = (query_data *)NULL;
427 char QueryType = QUERY_BOOLEAN;
428 char OutputType = QUERY_DOCNUMS;
429 char *LinePtr = (char *)NULL;
430
431 if (cur_cachenum == -1) return 0;
432 qd = dbcache[cur_cachenum].qd;
433 if (qd == NULL) return 0;
434
435 ResetFileStats (qd);
436 qd->max_mem_in_use = qd->mem_in_use = 0;
437 qd->tot_hops_taken += qd->hops_taken;
438 qd->tot_num_of_ptrs += qd->num_of_ptrs;
439 qd->tot_num_of_accum += qd->num_of_accum;
440 qd->tot_num_of_terms += qd->num_of_terms;
441 qd->tot_num_of_ans += qd->num_of_ans;
442 qd->tot_text_idx_lookups += qd->text_idx_lookups;
443 qd->hops_taken = qd->num_of_ptrs = 0;
444 qd->num_of_accum = qd->num_of_ans = qd->num_of_terms = 0;
445 qd->text_idx_lookups = 0;
446
447 LinePtr = ProcessCommands (line, qd);
448 if (CommandsErrorStr) {
449 fprintf (stderr, "%s\n", CommandsErrorStr);
450 return 0;
451 }
452 if (*LinePtr == '\0') return 1;
453
454 FreeQueryDocs (qd);
455
456 QueryType = get_query_type ();
457 OutputType = get_output_type ();
458 /* No point in hiliting words on a docnum query */
459 if (OutputType == OUTPUT_HILITE && QueryType == QUERY_DOCNUMS)
460 OutputType = OUTPUT_TEXT;
461
462 switch (QueryType)
463 {
464 case QUERY_BOOLEAN:
465 {
466 char *maxdocs = (char *)NULL;
467 BooleanQueryInfo bqi;
468 maxdocs = GetDefEnv ("maxdocs", "all");
469 bqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
470 if (qd->sd->sdh.indexed)
471 BooleanQuery (qd, line, &bqi, (BooleanEnv (GetEnv ("casefold"), 0) |
472 (BooleanEnv (GetEnv ("stem"), 0) << 1)));
473 else
474 BooleanQuery (qd, line, &bqi, qd->sd->sdh.stem_method);
475 /* if (qd->sd->sdh.indexed) BooleanQuery (qd, line, &bqi, 3);
476 else BooleanQuery (qd, line, &bqi, qd->sd->sdh.stem_method); */
477 break;
478 }
479 case QUERY_APPROX:
480 case QUERY_RANKED:
481 {
482 char *maxdocs = (char *)NULL;
483 char *maxterms = (char *)NULL;
484 char *maxaccum = (char *)NULL;
485 RankedQueryInfo rqi;
486 maxdocs = GetDefEnv ("maxdocs", "all");
487 maxterms = GetDefEnv ("max_terms", "all");
488 maxaccum = GetDefEnv ("max_accumulators", "all");
489 rqi.Sort = BooleanEnv (GetEnv ("sorted_terms"), 0);
490 rqi.QueryFreqs = BooleanEnv (GetEnv ("qfreq"), 1);
491 rqi.Exact = QueryType == QUERY_RANKED;
492 rqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
493 rqi.MaxTerms = strcmp (maxterms, "all") ? atoi (maxterms) : -1;
494 rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve;
495 if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas"))
496 rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
497 rqi.AccumMethod = toupper (*GetDefEnv ("accumulator_method", "A"));
498 rqi.MaxAccums = strcmp (maxaccum, "all") ? atoi (maxaccum) : -1;
499 rqi.HashTblSize = IntEnv (GetEnv ("hash_tbl_size"), 1000);
500 rqi.StopAtMaxAccum = BooleanEnv (GetEnv ("stop_at_max_accum"), 0);
501 rqi.skip_dump = GetEnv ("skip_dump");
502 RankedQuery (qd, line, &rqi);
503 break;
504 }
505 case QUERY_DOCNUMS:
506 {
507 DocnumsQuery (qd, line);
508 break;
509 }
510 }
511
512 return 1;
513}
514
515int mgq_numdocs(void)
516{
517 query_data *qd = NULL;
518 if (cur_cachenum == -1) return 0;
519 qd = dbcache[cur_cachenum].qd;
520 if (qd == NULL) return 0;
521
522 if (qd->DL) return qd->DL->num;
523 else return 0;
524}
525
526int mgq_numterms(void)
527{
528 query_data *qd = NULL;
529 if (cur_cachenum == -1) return 0;
530 qd = dbcache[cur_cachenum].qd;
531 if (qd == NULL) return 0;
532
533 if (qd->QTL) return qd->QTL->num;
534 else return 0;
535}
536
537int mgq_results(enum result_kinds kind,int skip,int howmany, int (*sender)(char *, int, int, float, void *), void *ptr)
538{
539 query_data *qd = NULL;
540 if (cur_cachenum == -1) return 0;
541 qd = dbcache[cur_cachenum].qd;
542 if (qd == NULL) return 0;
543
544 if (qd->DL) {
545 qd->doc_pos = 0;
546 MoreDocs(qd, kind, skip, howmany, sender, ptr);
547 }
548 return 0;
549}
550
551
552int is_dbcache_full (void) {
553 init_dbcache ();
554 if (cache_numloaded >= MAXNUMDATABASEINFO) return 1;
555 return 0;
556}
557
558int load_database (char *collection, char *mgdir,
559 char *gensuffix, char *textsuffix) {
560 int i = 0;
561 query_data *qd = NULL;
562 /* FILE *deb = NULL; */
563 init_dbcache ();
564
565 /* print out some debug information */
566/* deb = fopen ("/home/rjmcnab/gsdl/etc/deb.txt", "a");
567 fprintf (deb, "\ncache_nextaccessnum: %i\n", cache_nextaccessnum);
568 fprintf (deb, "cache_numloaded: %i\n", cache_numloaded);
569 fprintf (deb, "cur_cachenum: %i\n", cur_cachenum);
570 fprintf (deb, "MAXNUMDATABASEINFO: %i\n\n", MAXNUMDATABASEINFO);
571 for (i=0; i<MAXNUMDATABASEINFO; i++) {
572 fprintf (deb, "Entry %i\n", i);
573 fprintf (deb, " accessnum: %i\n", dbcache[i].accessnum);
574 fprintf (deb, " collection: %s\n", dbcache[i].collection);
575 fprintf (deb, " mgdir: %s\n", dbcache[i].mgdir);
576 fprintf (deb, " gensuffix: %s\n", dbcache[i].gensuffix);
577 fprintf (deb, " textsuffix: %s\n", dbcache[i].textsuffix);
578 fprintf (deb, " qd: %x\n", (int)(dbcache[i].qd));
579 }
580 fclose (deb); */
581
582 /* search for the index */
583 i = search_gensuffix (gensuffix);
584 if (i > 0) {
585 make_current (i);
586 return 1;
587 }
588
589 /* if there was a current database then the */
590 /* environment needs uninitialising */
591 make_current (-1);
592
593 /* get a free cache number */
594 i = get_free_dbcache ();
595 unload_database (i);
596
597 /* load the index */
598 qd = InitQuerySystem (mgdir, gensuffix, textsuffix, NULL);
599 if (qd == NULL) return 0;
600
601 /* cache this index */
602 cache_database (i, collection, mgdir, gensuffix, textsuffix, qd);
603
604 /* make this index current */
605 make_current (i);
606
607 return 1;
608}
609
610/* load_text_database tries to make a level 2 index of the */
611/* specified collection current */
612int load_text_database (char *collection) {
613 int i = 0;
614 init_dbcache ();
615
616 /* search for the index */
617 i = search_doc_collect (collection);
618
619 /* return if none were found */
620 if (i < 0) return 0;
621
622 /* make this index current */
623 make_current (i);
624 return 1;
625}
626
627void close_all_databases (void) {
628 int i = 0;
629 init_dbcache ();
630
631 /* unload all active databases */
632 for (i=0; i<MAXNUMDATABASEINFO; i++) {
633 unload_database (i);
634 }
635
636 /* if there was a current database then the */
637 /* environment needs uninitialising */
638 make_current (-1);
639}
640
641
642
Note: See TracBrowser for help on using the repository browser.