Changeset 1122


Ignore:
Timestamp:
2000-04-18T15:56:59+12:00 (24 years ago)
Author:
kjm18
Message:

added some comments

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/invf.h

    r925 r1122  
    3131
    3232// NOTE: This does not include the magic number
     33// header info for .invf.dict file
    3334struct invf_dict_header {
    3435  unsigned long lookback;
     
    9192// this version of the blocked dictionary uses a fixed number
    9293// of entries per block, not a fixed block size
     94// info for .invf.dict.blocked file
     95// blocked dict has a heap of blocks, some for words, some for tags
     96// and an index into each set of blocks. The index has pointers to
     97// the first entry in each block. Can do a binary search on the index
     98// to find out which block an elemnet is in
    9399struct block_dict_header : public invf_dict_header {
    94100  // note: word_dict_start and tag_dict_start are undefined
    95101  // for blocked dictionaries
    96102
    97   unsigned long entries_per_wblk;
     103  unsigned long entries_per_wblk; // word blocks
    98104  unsigned long num_wblks;
    99105  unsigned long max_wblk_size;
     
    101107  unsigned long wblk_idx_start;
    102108 
    103   unsigned long entries_per_tblk;
     109  unsigned long entries_per_tblk; // tag blocks
    104110  unsigned long num_tblks;
    105111  unsigned long max_tblk_size;
     
    117123struct block_dict_el {
    118124  UCArray el; // word or tag
    119   unsigned long frag_occur; // # entries in invf file
    120   unsigned long freq;
    121   unsigned long invf_ptr;
     125  unsigned long frag_occur; // # entries in invf file - if have a
     126  // word level index, this is the same as freq, otherwise, its the number
     127  // of fragments containing this word
     128  unsigned long freq; // # of times this word occurs
     129  unsigned long invf_ptr; // pointer into inverted file
    122130
    123131  virtual void Clear ();
     
    133141
    134142struct word_block_dict_el : public block_dict_el {
    135   unsigned long *levelFreqs;
     143  unsigned long *levelFreqs; // freq of the word at each level
    136144
    137145  void Clear ();
     
    209217#define SKIP_MODE_NO_SKIPS 0
    210218
     219// invf file - has a list of frags for each word, but the word is not
     220//  stored in the invf file - the dictionaries store the words, along
     221// with num entries, and a pointer into invf file
    211222struct invf_file_header {
    212223  unsigned long no_of_words;
Note: See TracChangeset for help on using the changeset viewer.