Changeset 99 for trunk/gsdl/src


Ignore:
Timestamp:
1999-01-08T13:33:50+13:00 (25 years ago)
Author:
rjmcnab
Message:

Enabled mg and the library software to read in more than one index
at a time.

Location:
trunk/gsdl/src/library
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/library/Makefile

    r94 r99  
    2626CC = gcc
    2727CCFLAGS = -g
    28 DEFS = -DNZDL -DQUIET -DSHORT_SUFFIX -DPARADOCNUM -DUSE_FASTCGI
     28DEFS = -DNZDL -DQUIET -DSHORT_SUFFIX -DPARADOCNUM -DUSE_FASTCGI -DGSDLSERVER
    2929RANLIB = ranlib
    3030INCLUDES = -I../../lib -I../../packages/mg-1.3d -I../../packages/mg-1.3d/lib \
  • trunk/gsdl/src/library/libinterface.cpp

    r96 r99  
    268268  } else {
    269269    default_index = real2dirindex (cfg_info.indexmap, cfg_info.defaultindex);
     270  }
     271
     272  // set the text default index (the default index to use when
     273  // retrieving documents).
     274  text_default_index = default_index;
     275  if (!isdoclevelindex (cfg_info.defaultindex)) {
     276    text_default_index = real2dirindex (cfg_info.indexmap,
     277                    getdoclevelindex (cfg_info.indexmap));
    270278  }
    271279
     
    920928    // get document text if there is any
    921929    if (info.contents.empty()) {
    922         search.docTargetDocument(default_index, queryparams.collection,
     930        search.docTargetDocument(text_default_index, queryparams.collection,
    923931                 info.docnum, content);
    924932    if (info.title != "<i>(introductory text)</i>")
     
    966974    }
    967975
    968     search.docTargetDocument(default_index, queryparams.collection,
     976    search.docTargetDocument(text_default_index, queryparams.collection,
    969977                 info.docnum, text);                   
    970978    if (info.title != "<i>(introductory text)</i>")
     
    10151023      // get document text if there is any
    10161024      if (info.contents.empty()) {
    1017         search.docTargetDocument(default_index, queryparams.collection,
     1025        search.docTargetDocument(text_default_index, queryparams.collection,
    10181026                                 info.docnum, content);
    10191027    if (info.title != "<i>(introductory text)</i>")
  • trunk/gsdl/src/library/libinterface.h

    r96 r99  
    9696  int numbooks;
    9797
     98  text_t text_default_index;
    9899  text_t default_index;
    99100
  • trunk/gsdl/src/library/mgq.c

    r94 r99  
    3636#include "mgq.h"
    3737
     38/* get a reasonable database cache size */
     39#ifndef MAXNUMDATABASEINFO
     40#  ifdef GSDLSERVER
     41#    define MAXNUMDATABASEINFO 10
     42#  else
     43#    define MAXNUMDATABASEINFO 2
     44#  endif
     45#endif
     46
     47#define MAXCOLLECTIONLEN  16
     48#define MAXMGDIRLEN       256
     49#define MAXGENSUFFIXLEN   256
     50#define MAXTEXTSUFFIXLEN  256
     51
     52typedef struct DatabaseInfo {
     53  int accessnum; /* -1 = invalid record */
     54  char collection[MAXCOLLECTIONLEN];
     55  char mgdir[MAXMGDIRLEN];
     56  char gensuffix[MAXGENSUFFIXLEN];
     57  char textsuffix[MAXTEXTSUFFIXLEN];
     58  query_data *qd;
     59} DatabaseInfo;
     60
     61
    3862/* globals needed by some vague part of mg... */
    3963FILE *OutFile = NULL, *InFile = NULL;
     
    4165int Quitting = 0;
    4266
    43 static query_data *qd;
    44 char db_loaded_name[256] = "";
     67/* globals needed to handle loading of databases */
     68static int cur_cachenum = -1;
     69
     70/* globals needed by the database cache */
     71static DatabaseInfo dbcache[MAXNUMDATABASEINFO];
     72static int cache_nextaccessnum = 0;
     73static int cache_numloaded = 0;
     74
     75
    4576
    4677#if defined(PARADOCNUM) ||  defined(NZDL)
    47 int *Paragraph = NULL;
    48 int Documents = 0;
    49 
    50 int GetDocNumFromParaNum(int paranum)
    51 {
     78static int GetDocNumFromParaNum(query_data *qd, int paranum) {
     79  int Documents = qd->td->cth.num_of_docs;
     80  int *Paragraph = qd->paragraph;
    5281  int low = 1, high = Documents;
    5382  int mid = (low+high)/2;
     
    117146      int docnum = GetDocNum(qd);
    118147#if defined(PARADOCNUM) ||  defined(NZDL)
    119       if (qd->id->ifh.InvfLevel == 3) docnum = GetDocNumFromParaNum(docnum);
     148      if (qd->id->ifh.InvfLevel == 3) docnum = GetDocNumFromParaNum(qd, docnum);
    120149#endif
    121150      switch (kind)
     
    147176
    148177
    149 void
    150 send_query_term_freqs(QueryTermList *qtl,
    151               int (*sender)(char *,int,int,float,void *), void *ptr)
     178static void send_query_term_freqs(QueryTermList *qtl,
     179                  int (*sender)(char *,int,int,float,void *), void *ptr)
    152180{
    153181  int i;
     
    162190
    163191
    164 void
    165 send_terms (TermList * qtl, int (*sender)(char *,int,int,float,void *), void *ptr)
     192static void send_terms (TermList * qtl,
     193            int (*sender)(char *,int,int,float,void *), void *ptr)
    166194{
    167195  int i = 0;
     
    206234
    207235
    208 int mgq_init(char *mgdir, char *gensuffix, char *textsuffix)
    209 {
     236
     237
     238
     239
     240/******************************************
     241 * functions to handle the database cache *
     242 ******************************************/
     243
     244/* init_dbcache should be called at the start of each */
     245/* function which deals with the database cache */
     246static void init_dbcache () {
     247  static int dbcacheinited = 0;
     248  int i;
     249
     250  if (dbcacheinited) return;
     251
     252  cache_numloaded = 0;
     253
     254  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     255    dbcache[i].accessnum = -1;
     256    dbcache[i].collection[0] = '\0';
     257    dbcache[i].mgdir[0] = '\0';
     258    dbcache[i].gensuffix[0] = '\0';
     259    dbcache[i].textsuffix[0] = '\0';
     260    dbcache[i].qd = NULL;
     261  }
     262
     263  dbcacheinited = 1;
     264}
     265
     266/* returns the next cache access number and increments it */
     267static int get_next_accessnum () {
     268  return cache_nextaccessnum++;
     269}
     270
     271/* get_free_dbcache returns the cache number which */
     272/* was used the longest time ago */
     273/* init_dbcache should be called before this function */
     274static int get_free_dbcache () {
     275  int i;
     276  int minaccessnum = cache_nextaccessnum; /* the current max */
     277  int minpos = 0;
     278
     279  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     280    if (dbcache[i].accessnum < minaccessnum) {
     281      minaccessnum = dbcache[i].accessnum;
     282      minpos = i;
     283    }
     284  }
     285 
     286  return minpos;
     287}
     288
     289/* search_doc_collect will search for an index which */
     290/* belongs to a certain collection and which has a document */
     291/* level index. It returns -1 if none could be found. */
     292/* init_dbcache should be called before this function */
     293static int search_doc_collect (char *collection) {
     294  int i;
     295
     296  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     297    if ((dbcache[i].accessnum >= 0) &&
     298    (dbcache[i].qd != NULL) &&
     299    (strcmp (collection, dbcache[i].collection) == 0) &&
     300    (dbcache[i].qd->id->ifh.InvfLevel == 2)) {
     301      dbcache[i].accessnum = get_next_accessnum ();
     302      return i;
     303    }
     304  }
     305
     306  return -1;
     307}
     308
     309/* search_doc_collect will search for an index which */
     310/* has a certain gensuffix. It returns -1 if none could be found. */
     311/* init_dbcache should be called before this function */
     312static int search_gensuffix (char *gensuffix) {
     313  int i;
     314
     315  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     316    if ((dbcache[i].accessnum >= 0) &&
     317    (dbcache[i].qd != NULL) &&
     318    (strcmp (gensuffix, dbcache[i].gensuffix) == 0)) {
     319      dbcache[i].accessnum = get_next_accessnum ();
     320      return i;
     321    }
     322  }
     323
     324  return -1;
     325}
     326
     327/* unload_database will unload a certain entry within */
     328/* the database cache, clearing it for furture use. */
     329static void unload_database (int i) {
     330  /* check to see if it contains anything */
     331  if (dbcache[i].accessnum < 0 || dbcache[i].qd == NULL)
     332    return;
     333
     334  /* unload all the query information  */
     335  FinishQuerySystem(dbcache[i].qd);
     336 
     337  /* reset all the db info */
     338  dbcache[i].accessnum = -1;
     339  dbcache[i].collection[0] = '\0';
     340  dbcache[i].mgdir[0] = '\0';
     341  dbcache[i].gensuffix[0] = '\0';
     342  dbcache[i].textsuffix[0] = '\0';
     343  dbcache[i].qd = NULL;
     344
     345  cache_numloaded--;
     346  if (cache_numloaded < 0) cache_numloaded = 0;
     347}
     348
     349/* cache_database will store the information about */
     350/* a database in the database cache. */
     351static void cache_database (int i, char *collection, char *mgdir, char *gensuffix,
     352                char *textsuffix, query_data *qd) {
     353  /* make sure this entry has been unloaded first */
     354  if (dbcache[i].accessnum >= 0 && dbcache[i].qd != NULL)
     355    unload_database (i);
     356
     357  /* store the db info */
     358  dbcache[i].accessnum = get_next_accessnum ();
     359  strcpy (dbcache[i].collection, collection);
     360  strcpy (dbcache[i].mgdir, mgdir);
     361  strcpy (dbcache[i].gensuffix, gensuffix);
     362  strcpy (dbcache[i].textsuffix, textsuffix);
     363  dbcache[i].qd = qd;
     364
     365  cache_numloaded++;
     366}
     367
     368static void make_current (int i) {
     369  /* see if it is the current index */
     370  if (i == cur_cachenum) return;
     371 
     372  /* unload the old index */
     373  if (cur_cachenum >= 0) UninitEnv ();
     374  cur_cachenum = -1;
     375
     376  /* make sure the new one is ok */
     377  if (i < 0 || dbcache[i].accessnum < 0 || dbcache[i].qd == NULL)
     378    return;
     379
     380  /* load the new one */
     381
    210382  /* Initialise the environment with default values */
    211383  InitEnv ();
    212384 
    213   SetEnv("mgdir",mgdir,NULL);
    214   SetEnv("mgname",gensuffix,NULL);
    215   SetEnv("textname",textsuffix,NULL);
     385  SetEnv("mgdir",dbcache[i].mgdir,NULL);
     386  SetEnv("mgname",dbcache[i].gensuffix,NULL);
     387  SetEnv("textname",dbcache[i].textsuffix,NULL);
    216388 
    217389  PushEnv ();
    218390 
    219   qd = InitQuerySystem (GetDefEnv ("mgdir", ""),
    220             GetDefEnv ("mgname", ""),
    221             GetDefEnv ("textname", NULL),
    222             NULL);
    223  
    224   if (qd == NULL) return 0;  else return 1;
    225 }
     391  cur_cachenum = i;
     392}
     393
     394
     395
     396/********************
     397 * public functions *
     398 ********************/
    226399
    227400int mgq_ask(char *line)
    228401{
     402  query_data *qd = NULL;
    229403  char QueryType;  char OutputType;  char *LinePtr;
     404
     405  if (cur_cachenum == -1) return 0;
     406  qd = dbcache[cur_cachenum].qd;
     407  if (qd == NULL) return 0;
    230408 
    231409  ResetFileStats (qd);
     
    311489int mgq_numdocs(void)
    312490{
     491  query_data *qd = NULL;
     492  if (cur_cachenum == -1) return 0;
     493  qd = dbcache[cur_cachenum].qd;
     494  if (qd == NULL) return 0;
     495
    313496  if (qd->DL) return qd->DL->num;
    314497  else return 0;
     
    317500int mgq_numterms(void)
    318501{
     502  query_data *qd = NULL;
     503  if (cur_cachenum == -1) return 0;
     504  qd = dbcache[cur_cachenum].qd;
     505  if (qd == NULL) return 0;
     506
    319507  if (qd->QTL) return qd->QTL->num;
    320508  else return 0;
     
    323511int mgq_results(enum result_kinds kind,int skip,int howmany, int (*sender)(char *, int, int, float, void *), void *ptr)
    324512{
     513  query_data *qd = NULL;
     514  if (cur_cachenum == -1) return 0;
     515  qd = dbcache[cur_cachenum].qd;
     516  if (qd == NULL) return 0;
     517
    325518  if (qd->DL) {
    326519    qd->doc_pos = 0;
     
    331524
    332525
    333 void mgq_finish(void)
    334 {
    335   FinishQuerySystem(qd);
    336   UninitEnv ();
    337 }
    338 
    339 int load_database(char *mgdir, char *gensuffix, char *textsuffix)
    340 {
    341   if ((db_loaded_name[0]) != '\0' && (strcmp(gensuffix,db_loaded_name) != 0)) {
    342     mgq_finish();
    343     db_loaded_name[0] = '\0';
    344   }
    345   if ((db_loaded_name[0] == '\0') && mgq_init(mgdir, gensuffix, textsuffix)) {
    346     strcpy(db_loaded_name, gensuffix);
    347   }
    348   return (db_loaded_name[0] != '\0');
    349 }
     526int is_dbcache_full () {
     527  init_dbcache ();
     528  if (cache_numloaded >= MAXNUMDATABASEINFO) return 1;
     529  return 0;
     530}
     531
     532int load_database (char *collection, char *mgdir,
     533           char *gensuffix, char *textsuffix) {
     534  int i;
     535  query_data *qd = NULL;
     536  FILE *deb = NULL;
     537  init_dbcache ();
     538
     539  /* print out some debug information */
     540/*  deb = fopen ("/home/rjmcnab/gsdl/etc/deb.txt", "a");
     541  fprintf (deb, "\ncache_nextaccessnum: %i\n", cache_nextaccessnum);
     542  fprintf (deb, "cache_numloaded: %i\n", cache_numloaded);
     543  fprintf (deb, "cur_cachenum: %i\n", cur_cachenum);
     544  fprintf (deb, "MAXNUMDATABASEINFO: %i\n\n", MAXNUMDATABASEINFO);
     545  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     546    fprintf (deb, "Entry %i\n", i);
     547    fprintf (deb, "  accessnum: %i\n", dbcache[i].accessnum);
     548    fprintf (deb, "  collection: %s\n", dbcache[i].collection);
     549    fprintf (deb, "  mgdir: %s\n", dbcache[i].mgdir);
     550    fprintf (deb, "  gensuffix: %s\n", dbcache[i].gensuffix);
     551    fprintf (deb, "  textsuffix: %s\n", dbcache[i].textsuffix);
     552    fprintf (deb, "  qd: %x\n", (int)(dbcache[i].qd));
     553  }
     554  fclose (deb); */
     555
     556  /* search for the index */
     557  i = search_gensuffix (gensuffix);
     558  if (i > 0) {
     559    make_current (i);
     560    return 1;
     561  }
     562
     563  /* if there was a current database then the */
     564  /* environment needs uninitialising */
     565  make_current (-1);
     566
     567  /* get a free cache number */
     568  i = get_free_dbcache ();
     569  unload_database (i);
     570
     571  /* load the index */
     572  qd = InitQuerySystem (mgdir, gensuffix, textsuffix, NULL);
     573  if (qd == NULL) return 0;
     574
     575  /* cache this index */
     576  cache_database (i, collection, mgdir, gensuffix, textsuffix, qd);
     577
     578  /* make this index current */
     579  make_current (i);
     580
     581  return 1;
     582}
     583
     584/* load_text_database tries to make a level 2 index of the */
     585/* specified collection current */
     586int load_text_database (char *collection) {
     587  int i;
     588  init_dbcache ();
     589
     590  /* search for the index */
     591  i = search_doc_collect (collection);
     592
     593  /* return if none were found */
     594  if (i < 0) return 0;
     595 
     596  /* make this index current */
     597  make_current (i);
     598  return 1;
     599}
     600
     601void close_all_databases () {
     602  int i;
     603  init_dbcache ();
     604 
     605  /* unload all active databases */
     606  for (i=0; i<MAXNUMDATABASEINFO; i++) {
     607    unload_database (i);
     608  }
     609
     610  /* if there was a current database then the */
     611  /* environment needs uninitialising */
     612  make_current (-1);
     613}
  • trunk/gsdl/src/library/mgq.h

    r4 r99  
    1414int mgq_numdocs(void);
    1515int mgq_numterms(void);
    16 int load_database(char *mgdir, char *gensuffix, char *textsuffix);
    1716
    18 /* this will equal "" when a database is not loaded */
    19 /* if a database is loaded it will equal its gensuffix */
    20 extern char db_loaded_name[256];
     17
     18int is_dbcache_full ();
     19int load_database (char *collection, char *mgdir, char *gensuffix, char *textsuffix);
     20int load_text_database (char *collection);
     21void close_all_databases ();
    2122
    2223
  • trunk/gsdl/src/library/mgsearch.cpp

    r96 r99  
    139139}
    140140
     141bool isdoclevelindex (const text_t &realindex) {
     142  char *docstr = "document";
     143  text_t::const_iterator here = realindex.begin ();
     144  text_t::const_iterator end = realindex.end ();
     145
     146  while (here != end) {
     147    if (*docstr == '\0') return true;
     148    if (*docstr != (char)(*here)) return false;
     149    docstr++;
     150    here++;
     151  }
     152
     153  return false;
     154}
     155
     156text_t getdoclevelindex (const text_tarray &indexmap) {
     157  text_tarray::const_iterator here = indexmap.begin();
     158  text_tarray::const_iterator end = indexmap.end();
     159  text_t maprealindex, mapdirindex;
     160 
     161  while (here != end) {
     162    getrealdirindex (*here, maprealindex, mapdirindex);
     163    if (isdoclevelindex (maprealindex)) return maprealindex;
     164    here++;
     165  }
     166 
     167  return "";
     168}
    141169
    142170
     
    277305
    278306
    279   // get the names of the index and text suffixes
     307  // get the names of the collection, index and text suffixes
     308  char *ccollection = queryparams.collection.getcstr();
     309  assert (ccollection != NULL);
    280310  char *idxsuffix = (getindexsuffix (queryparams.collection,
    281311                     queryparams.search_index)).getcstr();
     
    290320#endif
    291321
    292   if (load_database(ccollectdir, idxsuffix, txtsuffix))
     322  if (load_database(ccollection, ccollectdir, idxsuffix, txtsuffix))
    293323    {
    294324      setsearchmode (queryparams);
     
    299329
    300330  // free up the c strings
     331  delete ccollection;
    301332  delete idxsuffix;
    302333  delete txtsuffix;
     
    441472                      text_t &output)
    442473{
    443   bool databaseloaded = true;
     474  int databaseloaded = 0;
    444475
    445476  output.clear();
    446477
    447 
    448   // make sure index is level 2
    449 
    450   ////// this changed with new naming scheme in new building software
    451   /////  i.e paragraph level index no longer contain number '3' but begin
    452   /////  with letter 'p'
    453 
    454   text_t db_loaded = db_loaded_name;
    455 
    456   if (!db_loaded.empty()) {
    457       text_t::const_iterator here = db_loaded.begin();
    458       text_t::const_iterator end = db_loaded.end();
    459  
    460      
    461       //while (here != end) {
    462       //  if (*here == '3')
    463       //    databaseloaded = false;
    464       //  here ++;
    465       //}
    466 
    467       char separator = '/';
    468       text_t db;
    469       int found = 0;
    470 #ifdef __WIN32__
    471       separator = '\\';
    472 #endif;
    473       // strip away path to db and following collection name
    474       end --;
    475       while (end != here) {
    476           if (*end == separator) {
    477               if (found) break;
    478               else {db.clear(); found = 1; end--; continue;}
    479           }
    480           db.push_back(*end);
    481           end --;
    482       }
    483 
    484       // string will have been reversed above so see if last
    485       // character is 'p'
    486       if (db[db.size()-1] == 'p') databaseloaded = false;
    487   }
    488 
    489   // find out if the database is already loaded
    490   // this is needed because a different index (but valid one)
    491   // might be already loaded.
    492   // this comparison is needed because 'load_database'
    493   // is now more oriented towards indexes
    494   if (databaseloaded == true) {
    495       text_t::const_iterator here = collection.begin();
    496       text_t::const_iterator end = collection.end();
    497       char *dbhere = &db_loaded_name[strlen(db_loaded_name) - collection.size()]; // assumes collection shorter than db_loaded_name
    498       while (here != end)
    499       {
    500           if (*here != *dbhere)
    501           {
    502               databaseloaded = false;
    503               break;
    504           }
    505           here++;
    506           dbhere++;
    507       }
    508   }
     478  char *ccollection = collection.getcstr();
     479  assert (ccollection != NULL);
     480
     481  // see if we can make an appropriate database current
     482  databaseloaded = load_text_database (ccollection);
    509483
    510484  // try and load the database
     
    524498#endif
    525499     
    526       if (load_database(ccollectdir, idxsuffix, txtsuffix))
    527     databaseloaded = true;
    528       else
    529     databaseloaded = false;
     500      databaseloaded = load_database(ccollection, ccollectdir, idxsuffix, txtsuffix);
    530501     
    531502      // free up the c strings
     
    534505      delete ccollectdir;
    535506    }
     507
     508  // free up the c collection string
     509  delete ccollection;
    536510 
    537511  if (databaseloaded)
  • trunk/gsdl/src/library/mgsearch.h

    r96 r99  
    1919text_t real2dirindex (const text_tarray &indexmap, const text_t &realindex);
    2020text_t real2macroindex (const text_t &realindex);
     21bool isdoclevelindex (const text_t &realindex);
     22text_t getdoclevelindex (const text_tarray &indexmap);
    2123
    2224class mgsearchclass {
Note: See TracChangeset for help on using the changeset viewer.