source: main/trunk/greenstone2/common-src/indexers/mgpp/text/FText.cpp@ 25147

Last change on this file since 25147 was 25147, checked in by kjdon, 12 years ago

merged 64_bit_Greenstone branch into trunk, rev 25139

  • Property svn:keywords set to Author Date Id Revision
File size: 5.7 KB
Line 
1/**************************************************************************
2 *
3 * FText.cpp -- File structures for text compression
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "FText.h"
23#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
24
25
26BitAddr::BitAddr () {
27 Clear ();
28}
29
30void BitAddr::Clear () {
31 byte = 0;
32 bit = 0;
33}
34
35bool BitAddr::Read (FILE *f) {
36 return (ReadUL (f, byte) && ReadUC (f, bit));
37}
38
39bool BitAddr::Write (FILE *f) const {
40 return (WriteUL (f, byte) && WriteUC (f, bit));
41}
42
43#define BITADDRLEN (sizeof(mg_u_long) + sizeof(unsigned char))
44
45
46TextLevelInfo::TextLevelInfo () {
47 Clear();
48}
49
50void TextLevelInfo::Clear () {
51 levelTag.erase (levelTag.begin(), levelTag.end());
52 textIdxPtr = 0;
53 numEntries = 0;
54}
55
56bool TextLevelInfo::Read (FILE *f) {
57 return (ReadUCArray (f, levelTag) &&
58 ReadUL (f, textIdxPtr) &&
59 ReadUL (f, numEntries));
60}
61
62bool TextLevelInfo::Write (FILE *f) const {
63 return (WriteUCArray (f, levelTag) &&
64 WriteUL (f, textIdxPtr) &&
65 WriteUL (f, numEntries));
66}
67
68ostream &operator<<(ostream &s, const TextLevelInfo &l) {
69 s << " Tag: \"" << l.levelTag << "\"\n"
70 << " textIdxPtr: " << l.textIdxPtr << "\n"
71 << " numEntries: " << l.numEntries << "\n\n";
72
73 return s;
74}
75
76
77FTextLevel::FTextLevel () {
78 Clear();
79}
80
81void FTextLevel::Clear () {
82 levelInfo.erase (levelInfo.begin(), levelInfo.end());
83}
84
85bool FTextLevel::Read (FILE *f) {
86 // read in the array size
87 mg_u_long arrSize = 0;
88 if (!ReadVarLenUL (f, arrSize)) return false;
89
90 TextLevelInfo thisLevel;
91 while (arrSize > 0) {
92 // read and insert the next level information
93 if (!thisLevel.Read (f)) return false;
94 levelInfo[thisLevel.levelTag] = thisLevel;
95
96 --arrSize;
97 }
98
99 return true;
100}
101
102bool FTextLevel::Write (FILE *f) const {
103 // write out the array size
104 if (!WriteVarLenUL (f, levelInfo.size())) return false;
105
106 // write out each level info
107 TextLevelInfoMap::const_iterator here = levelInfo.begin();
108 TextLevelInfoMap::const_iterator end = levelInfo.end();
109 while (here != end) {
110 if (!(*here).second.Write (f)) return false;
111
112 ++here;
113 }
114
115 return true;
116}
117
118ostream &operator<<(ostream &s, const FTextLevel &l) {
119 TextLevelInfoMap::const_iterator here = l.levelInfo.begin ();
120 TextLevelInfoMap::const_iterator end = l.levelInfo.end ();
121 while (here != end) {
122 s << (*here).second;
123 ++here;
124 }
125
126 return s;
127}
128
129
130TextIdx::TextIdx () {
131 Clear ();
132}
133
134void TextIdx::Clear () {
135 start.Clear();
136 end.Clear();
137 which = 0;
138}
139
140#define TEXTIDXLEN (BITADDRLEN*2 + sizeof(char))
141
142bool TextIdx::Read (FILE *f, const TextLevelInfo &levelInfo,
143 mg_u_long docNum) {
144 if (!SeekTextIdx (f, levelInfo, docNum)) return false;
145 return Read (f);
146}
147
148bool TextIdx::Read (FILE *f) {
149 return (start.Read (f) && end.Read (f) && ReadUC (f, which));
150}
151
152bool TextIdx::Write (FILE *f) const {
153 return (start.Write (f) && end.Write (f) && WriteUC (f, which));
154}
155
156ostream &operator<<(ostream &s, const TextIdx &t) {
157 s << "start byte: " << t.start.byte << "\n";
158 s << "start bit: " << (int)t.start.bit << "\n";
159 s << "end byte: " << t.end.byte << "\n";
160 s << "end bit: " << (int)t.end.bit << "\n";
161 s << "which: " << (int)t.which << "\n\n";
162
163 return s;
164}
165
166
167bool SeekTextIdx (FILE *f, const TextLevelInfo &levelInfo,
168 mg_u_long docNum) {
169 if (docNum == 0 || docNum > levelInfo.numEntries) return false;
170
171 mg_u_long seekPos = levelInfo.textIdxPtr + (docNum-1) * TEXTIDXLEN;
172 if (fseek (f, seekPos, SEEK_SET) != 0) return false;
173
174 return true;
175}
176
177
178bool ReadTextIdxArray (FILE *f, TextIdxArray &a, mg_u_long arrSize) {
179 // clear the array
180 a.erase (a.begin(), a.end());
181
182 // read in each element
183 TextIdx idx;
184 while (arrSize > 0) {
185 if (!idx.Read(f)) return false;
186 a.push_back (idx);
187 --arrSize;
188 }
189
190 return true;
191}
192
193bool WriteTextIdxArray (FILE *f, const TextIdxArray &a) {
194 TextIdxArray::const_iterator here = a.begin();
195 TextIdxArray::const_iterator end = a.end();
196 while (here != end) {
197 if (!(*here).Write(f)) return false;
198 ++here;
199 }
200
201 return true;
202}
203
204
205CompressTextInfo::CompressTextInfo () {
206 Clear();
207}
208
209void CompressTextInfo::Clear () {
210 ResetStart();
211 docPtrs.erase (docPtrs.begin(), docPtrs.end());
212}
213
214void CompressTextInfo::ResetStart () {
215 inDoc = false;
216 start.Clear();
217 which = 0;
218}
219
220void CompressTextInfo::SetStart (mg_u_long startPos,
221 unsigned char startBit,
222 unsigned char startWhich) {
223 // place an imaginary end tag if needed
224 // note: the document tag always needs a closing tag
225 if (inDoc) SetEnd (startPos, startBit);
226
227 // remember start of level
228 inDoc = true;
229 start.byte = startPos;
230 start.bit = startBit;
231 which = startWhich;
232}
233
234void CompressTextInfo::SetEnd (mg_u_long endPos,
235 unsigned char endBit) {
236 if (inDoc) {
237 // add completed entry to list of ptrs
238 TextIdx textIdx;
239 textIdx.start = start;
240 textIdx.end.byte = endPos;
241 textIdx.end.bit = endBit;
242 textIdx.which = which;
243 docPtrs.push_back (textIdx);
244
245 // reset this entry
246 ResetStart();
247 }
248}
249
Note: See TracBrowser for help on using the repository browser.