Changeset 927


Ignore:
Timestamp:
2000-02-16T11:45:22+13:00 (24 years ago)
Author:
kjm18
Message:

added feature to retrieve doc nums at a different level than the level
queried at. eg query at Document level, but retrieve section level docnums
bug in mg_perf_hash_build.cpp fixed

Location:
trunk/gsdl/src/mgpp/text
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/FragLevelConvert.cpp

    r855 r927  
    8686}
    8787
     88
     89bool FragLevelConvert::LevelToLevel (FragLevelConvert sectionLevelConvert,
     90         unsigned long levelNum, unsigned long &DocNum) {
     91 
     92  if (levelNum==1) {
     93    DocNum=1;
     94    return true;
     95  }
     96  unsigned long levelfragnum = sectionLevelConvert.levelStarts[levelNum-1]+1;
     97
     98  if (FragToLevel(levelfragnum, DocNum)) {
     99    return true;
     100  }
     101  return false;
     102 
     103
     104}
     105
     106
     107
  • trunk/gsdl/src/mgpp/text/FragLevelConvert.h

    r855 r927  
    2929
    3030
     31/*Notes about levelStarts (kjm18) - this is my interpretation of
     32whats going on, feel free to correct this if its wrong.
     33
     34say you have 5 docs, consisting of fragments:
     351:  1-55
     362: 56-83
     373: 84-106
     384: 107-152
     395: 153-211
     40
     41then levelStarts would have the values
     420  1  2   3   4   5
     43?  55 83 106 152 211
     44I dont know whats in position 0.
     45
     46*/
    3147class FragLevelConvert {
    3248protected:
     
    4359
    4460  bool FragToLevel (unsigned long fragNum, unsigned long &levelDocNum) const;
     61
     62  // this function converts from one level number to another
     63  // the FragLevelConvert thats calling the method should be loaded with
     64  // the level that you want to convert to, while the sectionLevelConvert
     65  // should be loaded with the level converting from.
     66  // level Num has the old level, DocNum will be returned with the new
     67  // coresponding level
     68  bool LevelToLevel (FragLevelConvert sectionLevelConvert,
     69           unsigned long levelNum, unsigned long &DocNum);
    4570};
    4671
  • trunk/gsdl/src/mgpp/text/MGQuery.cpp

    r879 r927  
    646646}
    647647
     648// new MGQuery to retrieve doc and section nums
     649// this will return doc nums for the level queried at (set in queryInfo)
     650// in QueryResult.docs and if a second level is specified,
     651// it will return corresponding docnums for that level in QueryResult.levels
     652void MGQuery (IndexData &indexData,
     653          const QueryInfo &queryInfo,
     654          const QueryNode *queryTree,
     655          ExtQueryResult &realresult, UCArray &level) {
     656  realresult.Clear ();
     657  QueryResult result; // temp result
     658
     659  // do the normal query
     660  MGQuery (indexData, queryInfo, queryTree, result);
     661
     662  // now that have the final result stuff, convert to ExtQueryResult,
     663  // add in level nums if needed
     664
     665  realresult.docs = result.docs;
     666  realresult.ranks = result.ranks;
     667  realresult.termFreqs = result.termFreqs;
     668
     669  if (queryInfo.docLevel == level || level.empty()) {
     670    realresult.levels = result.docs;
     671    return;
     672  }
     673
     674  // else need to convert from queryInfo.docLevel to level
     675
     676  // the original level info
     677  FragLevelConvert sectionLevelConverter = indexData.levelConverter;
     678
     679  // the new level info
     680  indexData.LoadLevel(level);
     681 
     682  unsigned long DocNum = 0;
     683 
     684  for (unsigned long i=0; i<realresult.docs.size(); i++) {
     685
     686    // do an if ! here????
     687    indexData.levelConverter.LevelToLevel(sectionLevelConverter, realresult.docs[i], DocNum);
     688    realresult.levels.push_back(DocNum);
     689  }
     690 
     691}
     692
     693
     694
     695
  • trunk/gsdl/src/mgpp/text/MGQuery.h

    r860 r927  
    114114#define NO_TERM_RANGE_END   (LONG_MAX/2)
    115115
     116/* NOTE: range stuff - the range values are for the previous term relative
     117to the current term. So if searching for the phrase "the cat", 'the' doesn't
     118need range limits, but 'cat' has a range of -2 to -1. ie, if have found 'cat'
     119then 'the' has to be at position between -2 and -1 relative to 'cat'.
     120"the cat" could also be searched for by 'cat' with no range limits, then 'the' with range 0 to 1.
     121 */
    116122class TermNode {
    117123public:
     
    157163          QueryResult &result);
    158164
     165// this function for retriving results with both section doc nums
     166// and Document docnums
     167void MGQuery (IndexData &indexData,
     168          const QueryInfo &queryInfo,
     169          const QueryNode *queryTree,
     170          ExtQueryResult &result, UCArray &level);
     171
    159172
    160173#endif
  • trunk/gsdl/src/mgpp/text/Terms.cpp

    r860 r927  
    7171
    7272
     73
    7374ostream &operator<< (ostream &s, const QueryResult &r) {
    7475  s << "docs: ";
     
    9697}
    9798
    98 
     99//---------------------------------------------------
     100// new ExtQueryResult stuff
     101void ExtQueryResult::Clear () {
     102  docs.erase (docs.begin(), docs.end());
     103  levels.erase (levels.begin(), levels.end());
     104  ranks.erase (ranks.begin(), ranks.end());
     105  termFreqs.erase (termFreqs.begin(), termFreqs.end());
     106}
     107
     108ExtQueryResult::ExtQueryResult () {
     109  Clear ();
     110}
     111
     112ostream &operator<< (ostream &s, const ExtQueryResult &r) {
     113  s << "docs: ";
     114  unsigned long i;
     115  for (i=0; i<r.docs.size(); i++)
     116    s << r.docs[i] << ", ";
     117
     118  s << "\nlevels: ";
     119  for (i=0; i<r.levels.size(); i++)
     120    s << r.levels[i] << ", ";
     121
     122 
     123  s << "\nranks: ";
     124  for (i=0; i<r.ranks.size(); i++)
     125    s << r.ranks[i] << ", ";
     126
     127  s << "\ntermFreqs: ";
     128  for (i=0; i<r.termFreqs.size(); i++)
     129    s << r.termFreqs[i] << ", ";
     130  s << "\n\n";
     131
     132  return s;
     133}
     134
     135
     136bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2) {
     137  return ((r1.docs == r2.docs) &&
     138      (r1.levels == r2.levels) &&
     139      (r1.ranks == r2.ranks) &&
     140      (r1.termFreqs == r2.termFreqs));
     141}
     142
     143//--------------------------------------
    99144void FragData::Clear () {
    100145  matchDocs = 0;
  • trunk/gsdl/src/mgpp/text/Terms.h

    r855 r927  
    161161                QueryResult &result);
    162162
     163
     164// new QueryResult class to handle retrieval of doc and level nums.
     165// Use this class with extended version of MGQuery
     166
     167class ExtQueryResult : public QueryResult {
     168public:
     169  DocNumArray levels; // used for returning a different granularity, eg
     170  // search sections but return Document numbers, or search Documents,
     171  // return Section numbers.
     172 
     173  void Clear ();
     174  ExtQueryResult ();
     175};
     176
     177ostream &operator<< (ostream &s, const ExtQueryResult &r);
     178bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
     179
    163180#endif
    164181
  • trunk/gsdl/src/mgpp/text/mg_perf_hash_build.cpp

    r856 r927  
    3737/*
    3838   $Log$
     39   Revision 1.2  2000/02/15 22:45:22  kjm18
     40   added feature to retrieve doc nums at a different level than the level
     41   queried at. eg query at Document level, but retrieve section level docnums
     42   bug in mg_perf_hash_build.cpp fixed
     43
    3944   Revision 1.1  2000/01/14 02:26:19  sjboddie
    4045   Rodgers new C++ mg
     
    8893  dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
    8994            MAGIC_STEM_BUILD, MG_ABORT);
     95  if (dictFile==NULL) {
     96    FatalError(1, "unable to open file");
     97  }
    9098  idh.Read (dictFile);
    9199
     100  //cerr << idh.lookback<<" "<<idh.word_dict_start<<endl;
    92101  // go to the start of the word dictionary
    93102  fseek (dictFile, idh.word_dict_start, SEEK_SET);
     
    99108  if (!(starts = (u_char **) Xmalloc (sizeof (u_char *) * idh.word_dict_size)))
    100109    FatalError (1, "Out of memory");
    101 
     110  //cerr << "size= "<< idh.word_dict_size<<endl;
    102111  word_dict_el wordEl;
    103112  wordEl.SetNumLevels (idh.num_levels);
     
    117126    *pool++ = wordEl.el.size();
    118127    bcopy ((char *) wordEl.el.begin(), (char *) pool, wordEl.el.size());
    119     pool += l;
     128    //cerr << pool<<"   " <<starts[i]<<endl;
     129    pool += wordEl.el.size();
    120130    pool_left -= l;
     131   
    121132  }
    122133  fclose (dictFile);
    123 
     134  //cerr << pool<<"   " <<starts[i-1]<<endl;
     135  //cerr<<"starts  "<<starts[113529]<<endl;
     136  //cerr << starts[17][1] << " "<<starts[25][4]<<endl;
    124137  // create perfect hash file
    125138  hashFile = create_file (filename, INVF_DICT_HASH_SUFFIX, "wb",
Note: See TracChangeset for help on using the changeset viewer.