Changeset 1898 for trunk/gsdl/src/mgpp/text/mg_invf_dump.cpp
- Timestamp:
- 2001-02-02T14:12:29+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/mg_invf_dump.cpp
r856 r1898 37 37 /* 38 38 $Log$ 39 Revision 1.2 2001/02/02 01:12:29 kjm18 40 added more command line options, and better help message 41 39 42 Revision 1.1 2000/01/14 02:26:17 sjboddie 40 43 Rodgers new C++ mg … … 72 75 invf_file_header &ifh, 73 76 word_dict_el &wordEl, 74 unsigned long wordStart) { 77 unsigned long wordStart, 78 bool printFrags) { 75 79 cout << wordEl.frag_occur << " \"" << wordEl.el << "\"\n"; 76 80 77 // seek to the appropriate place in the inverted file 78 fseek (invfFile, wordStart, SEEK_SET); 79 80 stdio_bitio_buffer buffer(invfFile); 81 82 unsigned long B = BIO_Bblock_Init (idh.num_frags, wordEl.frag_occur); 83 unsigned long fragNum = 0; 84 unsigned long i; 85 for (i=0; i<wordEl.frag_occur; i++) { 86 unsigned long delta = buffer.bblock_decode (B, NULL); 87 fragNum += delta; 88 cout << " " << fragNum; 89 90 if (!ifh.word_level_index ) { 91 unsigned long count = buffer.gamma_decode (NULL); 92 cout << "(" << count << ")"; 93 } else { 94 cout << "(1)"; 95 } 96 } 97 98 cout << "\n"; 99 100 buffer.done(); 81 if (printFrags) { 82 // seek to the appropriate place in the inverted file 83 fseek (invfFile, wordStart, SEEK_SET); 84 85 stdio_bitio_buffer buffer(invfFile); 86 87 unsigned long B = BIO_Bblock_Init (idh.num_frags, wordEl.frag_occur); 88 unsigned long fragNum = 0; 89 unsigned long i; 90 for (i=0; i<wordEl.frag_occur; i++) { 91 unsigned long delta = buffer.bblock_decode (B, NULL); 92 fragNum += delta; 93 cout << " " << fragNum; 94 95 if (!ifh.word_level_index ) { 96 unsigned long count = buffer.gamma_decode (NULL); 97 cout << "(" << count << ")"; 98 } else { 99 cout << "(1)"; 100 } 101 } 102 103 cout << "\n"; 104 105 buffer.done(); 106 } 101 107 } 102 108 … … 105 111 invf_file_header &/*ifh*/, 106 112 dict_el &tagEl, 107 unsigned long tagStart) { 113 unsigned long tagStart, 114 bool printFrags) { 108 115 cout << tagEl.frag_occur << " \"<" << tagEl.el << ">\"\n"; 109 116 110 // seek to the appropriate place in the inverted file 111 fseek (invfFile, tagStart, SEEK_SET); 112 113 stdio_bitio_buffer buffer(invfFile); 114 115 unsigned long pTag = tagEl.frag_occur*2; 116 unsigned long B = BIO_Bblock_Init (idh.num_frags+pTag, pTag); 117 unsigned long fragNum = 0; 118 unsigned long i; 119 for (i=0; i<tagEl.frag_occur; i++) { 120 unsigned long delta = buffer.bblock_decode (B, NULL)-1; 121 fragNum += delta; 122 cout << " " << fragNum; 123 cout << "-"; 124 delta = buffer.bblock_decode (B, NULL)-1; 125 fragNum += delta; 126 cout << fragNum; 127 } 128 129 cout << "\n"; 130 131 buffer.done(); 117 if (printFrags) { 118 // seek to the appropriate place in the inverted file 119 fseek (invfFile, tagStart, SEEK_SET); 120 121 stdio_bitio_buffer buffer(invfFile); 122 123 unsigned long pTag = tagEl.frag_occur*2; 124 unsigned long B = BIO_Bblock_Init (idh.num_frags+pTag, pTag); 125 unsigned long fragNum = 0; 126 unsigned long i; 127 for (i=0; i<tagEl.frag_occur; i++) { 128 unsigned long delta = buffer.bblock_decode (B, NULL)-1; 129 fragNum += delta; 130 cout << " " << fragNum; 131 cout << "-"; 132 delta = buffer.bblock_decode (B, NULL)-1; 133 fragNum += delta; 134 cout << fragNum; 135 } 136 137 cout << "\n"; 138 139 buffer.done(); 140 } 132 141 } 133 142 … … 148 157 149 158 150 static void process_files (char *filename) { 159 static void process_files (char *filename, 160 bool printHeader, 161 bool printWords, 162 bool printTags, 163 bool printFrags) { 151 164 // open the dictionary 152 165 FILE *dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb", … … 166 179 167 180 // print out header information 168 PrintHeaderInfo (idh, ifh); 169 181 if (printHeader) { 182 PrintHeaderInfo (idh, ifh); 183 } 184 170 185 // open the inverted index 171 186 FILE *invfIdxFile = open_file (filename, INVF_IDX_SUFFIX, "rb", … … 176 191 177 192 // process all the words 178 unsigned long wordNum; 179 unsigned long wordStart; 180 word_dict_el wordEl; 181 wordEl.SetNumLevels (idh.num_levels); 182 for (wordNum=0; wordNum<idh.word_dict_size; wordNum++) { 183 wordEl.Read (dictFile, idh.num_levels); 184 ReadUL (invfIdxFile, wordStart); 185 PrintInvfWord (invfFile, idh, ifh, wordEl, wordStart); 193 if (printWords) { 194 unsigned long wordNum; 195 unsigned long wordStart; 196 word_dict_el wordEl; 197 wordEl.SetNumLevels (idh.num_levels); 198 for (wordNum=0; wordNum<idh.word_dict_size; wordNum++) { 199 wordEl.Read (dictFile, idh.num_levels); 200 ReadUL (invfIdxFile, wordStart); 201 PrintInvfWord (invfFile, idh, ifh, wordEl, wordStart, printFrags); 202 } 186 203 } 187 204 188 205 // process all the tags 189 unsigned long tagNum; 190 unsigned long tagStart; 191 dict_el tagEl; 192 for (tagNum=0; tagNum<idh.tag_dict_size; tagNum++) { 193 tagEl.Read (dictFile); 194 ReadUL (invfIdxFile, tagStart); 195 PrintInvfTag (invfFile, idh, ifh, tagEl, tagStart); 196 } 197 206 if (printTags) { 207 unsigned long tagNum; 208 unsigned long tagStart; 209 dict_el tagEl; 210 for (tagNum=0; tagNum<idh.tag_dict_size; tagNum++) { 211 tagEl.Read (dictFile); 212 ReadUL (invfIdxFile, tagStart); 213 PrintInvfTag (invfFile, idh, ifh, tagEl, tagStart, printFrags); 214 } 215 } 198 216 // close the open files 199 217 fclose (invfIdxFile); … … 209 227 dir_name = getenv ("MGDATA"); 210 228 opterr = 0; 229 230 bool printHeader = false; 231 bool printWords = false; 232 bool printTags = false; 233 bool printFrags = false; 211 234 212 235 msg_prefix = argv[0]; 213 while ((ch = getopt (argc, argv, "h bwtf:d:")) != -1) {236 while ((ch = getopt (argc, argv, "hrwtnf:d:")) != -1) { 214 237 switch (ch) { 215 238 case 'f': // input file … … 219 242 set_basepath(optarg); 220 243 break; 244 case 'r': 245 printHeader = true; 246 break; 247 case 'w': 248 printWords = true; 249 break; 250 case 'n': 251 printFrags = true; 252 break; 253 case 't': 254 printTags = true; 255 break; 221 256 case 'h': 222 257 case '?': 223 fprintf (stderr, "usage: %s [-h] [-b] [-w] [-t] [-f input_file]" 224 "[-d data directory]\n", argv[0]); 258 fprintf (stderr, "usage: %s [-h] [-r] [-w] [-t] [-n] [-f input_file]" 259 "[-d data directory]\n(-rwnt:print header, words, tags, fragnums)\n", 260 argv[0]); 225 261 exit (1); 226 262 } 227 263 } 228 264 229 process_files (filename );265 process_files (filename, printHeader, printWords, printTags, printFrags); 230 266 231 267 return 0;
Note:
See TracChangeset
for help on using the changeset viewer.