source: trunk/indexers/mgpp/text/FText.h@ 3365

Last change on this file since 3365 was 3365, checked in by kjdon, 22 years ago

Initial revision

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1/**************************************************************************
2 *
3 * FText.h -- File structures for text compression
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#ifndef FTEXT_H
23#define FTEXT_H
24
25#if defined(GSDL_USE_OBJECTSPACE)
26# include <ospace\std\map>
27#elif defined(GSDL_USE_STL_H)
28# include <map.h>
29#else
30# include <map>
31#endif
32
33#include <stdio.h>
34#include "UCArray.h"
35
36
37// holds a bit address
38class BitAddr {
39public:
40 unsigned long byte;
41 unsigned char bit;
42
43 BitAddr ();
44 void Clear ();
45
46 bool Read (FILE *f);
47 bool Write (FILE *f) const;
48};
49
50
51class TextLevelInfo {
52public:
53 UCArray levelTag;
54 unsigned long textIdxPtr;
55 unsigned long numEntries;
56
57 TextLevelInfo ();
58 void Clear ();
59
60 bool Read (FILE *f);
61 bool Write (FILE *f) const;
62};
63
64// stream output for debugging purposes
65ostream &operator<<(ostream &s, const TextLevelInfo &l);
66
67
68typedef map<UCArray, TextLevelInfo, LTUCArray> TextLevelInfoMap;
69
70
71class FTextLevel {
72public:
73 TextLevelInfoMap levelInfo;
74
75 FTextLevel ();
76 void Clear ();
77
78 bool Read (FILE *f);
79 bool Write (FILE *f) const;
80};
81
82// stream output for debugging purposes
83ostream &operator<<(ostream &s, const FTextLevel &l);
84
85
86// the text.idx file points into the compressed text
87class TextIdx {
88public:
89 BitAddr start; // the first bit of the compressed text
90 BitAddr end; // the first bit past the end of the text
91 unsigned char which; // 0 = non-word, 1 = word
92
93 TextIdx ();
94 void Clear ();
95
96 // does a seek and reads the appropriate record
97 bool Read (FILE *f, const TextLevelInfo &levelInfo,
98 unsigned long docNum);
99
100 bool Read (FILE *f);
101 bool Write (FILE *f) const;
102};
103
104// stream output for debugging purposes
105ostream &operator<<(ostream &s, const TextIdx &t);
106
107
108// note: document numbers start at 1
109
110bool SeekTextIdx (FILE *f, const TextLevelInfo &levelInfo,
111 unsigned long docNum);
112
113
114// used to store the pointers while compressing the text
115typedef vector<TextIdx> TextIdxArray;
116
117bool ReadTextIdxArray (FILE *f, TextIdxArray &a, unsigned long arrSize);
118bool WriteTextIdxArray (FILE *f, const TextIdxArray &a);
119
120
121// used to store information about a level while compressing the text
122class CompressTextInfo {
123public:
124 bool inDoc;
125 BitAddr start;
126 unsigned char which;
127 TextIdxArray docPtrs;
128
129 CompressTextInfo ();
130 void Clear ();
131
132 // clears inDoc, start, and which (not the docPtrs)
133 void ResetStart ();
134
135 // set the start of a level (closing off the last opening
136 // tag if needed)
137 void SetStart (unsigned long startPos,
138 unsigned char startBit,
139 unsigned char startWhich);
140
141 // if in a document, it will set the end, add the document to the
142 // list of document ptrs, and then call ResetStart
143 void SetEnd (unsigned long endPos,
144 unsigned char endBit);
145};
146
147typedef map<UCArray, CompressTextInfo, LTUCArray> CompressTextInfoMap;
148
149
150#endif
Note: See TracBrowser for help on using the repository browser.