source: gsdl/trunk/trunk/mgpp/text/TextEl.cpp@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago

Undoing change commited in r16582

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1/**************************************************************************
2 *
3 * TextEl.cpp -- Data structures for parsed documents
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "TextEl.h"
23#include "ctype.h"
24
25#define REC_TERM '\002'
26#define PARA_TERM '\003'
27
28
29void TextEl::Clear () {
30 elType = TextE;
31 tagName.erase (tagName.begin(), tagName.end());
32 text.erase (text.begin(), text.end());
33}
34
35
36static bool ReadTextTag (FileBuf &buf, TextEl &el) {
37 // we must have some sort of tag
38 unsigned char c;
39 if (!buf.Peek(c)) return false;
40
41 // get the '<'
42 if (c != '<') return false;
43 el.text.push_back (c);
44 if (!buf.Next() || !buf.Peek (c)) return false;
45
46 // see if we have a '/'
47 if (c == '/') {
48 // we have a closing tag
49 el.text.push_back (c);
50 if (!buf.Next() || !buf.Peek (c)) return false;
51 el.elType = CloseTagE;
52 } else {
53 // we have an opening tag
54 el.elType = OpenTagE;
55 }
56
57 // get the tag name
58 while (isalnum(c)) {
59 el.text.push_back (c);
60 el.tagName.push_back (c);
61 if (!buf.Next() || !buf.Peek (c)) return false;
62 }
63
64 // get everything up to the closing '>'
65 while (c != '>') {
66 el.text.push_back (c);
67 if (!buf.Next() || !buf.Peek (c)) return false;
68 }
69
70 // get the closing '>'
71 el.text.push_back (c);
72 buf.Next();
73
74 // if there is no tag name then make this a text element
75 if (el.tagName.empty()) el.elType = TextE;
76
77 return true;
78}
79
80static bool ReadTextTag (u_char ** buffer, TextEl &el) {
81 // we must have some sort of tag
82 u_char * buffer_pos = *buffer;
83 if (*buffer_pos == '\0' || *buffer_pos != '<') {
84 return false;
85 }
86 el.text.push_back('<');
87 buffer_pos ++;
88 if (*buffer_pos == '\0') {
89 *buffer = buffer_pos;
90 return false;
91 }
92 // see if we have a '/'
93 if (*buffer_pos == '/') {
94 el.text.push_back('/');
95 el.elType = CloseTagE;
96 buffer_pos++;
97 if (*buffer_pos == '\0') {
98 *buffer = buffer_pos;
99 return false;
100 }
101 } else {
102 el.elType = OpenTagE;
103 }
104 // get the tag name
105 while (isalnum(*buffer_pos)) {
106 el.text.push_back (*buffer_pos);
107 el.tagName.push_back (*buffer_pos);
108 buffer_pos++;
109
110 if (*buffer_pos == '\0') {
111 *buffer = buffer_pos;
112 return false;
113 }
114
115 }
116 cout << "tag name = "<<el.tagName<<endl;
117 // get everything up to closing '>'
118 while (*buffer_pos != '>') {
119 el.text.push_back(*buffer_pos);
120 buffer_pos++;
121 if (*buffer_pos == '\0') {
122 *buffer = buffer_pos;
123 return false;
124 }
125
126 }
127 // get teh closing '>'
128 el.text.push_back('>');
129 buffer_pos++;
130 *buffer = buffer_pos;
131 cout << "at end of readtexttag, buffer is "<<*buffer<<endl;
132 // if there is no tag name then make this a text element
133 if (el.tagName.empty()) el.elType = TextE;
134
135 return true;
136}
137
138static void ToggleParaTag (TextEl &el, bool &compatInPara) {
139 SetCStr (el.tagName, "Paragraph", 9);
140 el.text.erase (el.text.begin(), el.text.end());
141 if (compatInPara) {
142 el.elType = CloseTagE;
143 el.text.push_back (PARA_TERM);
144 } else el.elType = OpenTagE;
145 compatInPara = !compatInPara;
146}
147
148static void SetRecTag (TextEl &el, TextElType elType) {
149 el.elType = elType;
150 SetCStr (el.tagName, "Document", 8);
151 el.text.erase (el.text.begin(), el.text.end());
152 if (elType == CloseTagE)
153 el.text.push_back (REC_TERM);
154}
155
156
157bool ReadTextEl (FileBuf &buf, TextEl &el,
158 bool compatMode, bool &compatInPara) {
159 el.Clear();
160
161 unsigned char c;
162 if (!buf.Peek (c)) return false;
163
164 if (compatMode) {
165 if (c == PARA_TERM) {
166 ToggleParaTag (el, compatInPara);
167 if (!compatInPara) buf.Next();
168 return true;
169 }
170 if (c == REC_TERM) {
171 if (compatInPara) {
172 // need to close this paragraph
173 ToggleParaTag (el, compatInPara);
174 return true;
175 }
176 // can close this document
177 buf.Next();
178 SetRecTag (el, CloseTagE);
179 return true;
180 }
181 if (!compatInPara) {
182 // need to open a paragraph
183 ToggleParaTag (el, compatInPara);
184 return true;
185 }
186 }
187
188 // see if we have some sort of tag
189 if (c == '<') return ReadTextTag (buf, el);
190
191 // read in a text element
192 el.elType = TextE;
193 while (c != '<' && !(compatMode && (c == PARA_TERM || c == REC_TERM))) {
194 el.text.push_back (c);
195 if (!buf.Next() || !buf.Peek (c)) break;
196 }
197
198 return true;
199}
200bool ReadTextEl (u_char **buffer, TextEl &el,
201 bool compatMode, bool &compatInPara) {
202 el.Clear();
203 u_char * buffer_pos = *buffer;
204 if (*buffer_pos == '\0') return false;
205 if (compatMode) {
206 if (*buffer_pos == PARA_TERM) {
207 ToggleParaTag (el, compatInPara);
208 if (!compatInPara) buffer_pos++;
209 *buffer = buffer_pos;
210 return true;
211 }
212 if (*buffer_pos == REC_TERM) {
213 if (compatInPara) {
214 // need to close this paragraph
215 ToggleParaTag (el, compatInPara);
216 *buffer = buffer_pos;
217 return true;
218 }
219 // can close this document
220 buffer_pos++;
221 SetRecTag (el, CloseTagE);
222 *buffer = buffer_pos;
223 return true;
224 }
225 if (!compatInPara) {
226 // need to open a paragraph
227 ToggleParaTag (el, compatInPara);
228 *buffer = buffer_pos;
229 return true;
230 }
231 }
232
233 // see if we have some sort of tag
234 if (*buffer_pos == '<') return ReadTextTag (buffer, el);
235
236 // read in a text element
237 el.elType = TextE;
238 while (*buffer_pos != '<' && !(compatMode && (*buffer_pos == PARA_TERM || *buffer_pos == REC_TERM))) {
239 el.text.push_back (*buffer_pos);
240 buffer_pos++;
241 if (*buffer_pos == '\0') break;
242 }
243 *buffer = buffer_pos;
244 cout << "text element: "<<el.text<<endl;
245 return true;
246}
247
248static void AddTextEl (TextElArray &doc, unsigned long &docLen,
249 const TextEl &el) {
250 doc.push_back (el);
251 docLen += el.text.size();
252
253// cout << "elType: " << el.elType << "\n";
254// cout << "tagName: " << el.tagName << "\n";
255// cout << "text: \"" << el.text << "\"\n\n";
256}
257
258
259bool ReadDoc (FileBuf &buf, const UCArray &docTag,
260 TextElArray &doc, unsigned long &docLen,
261 bool compatMode) {
262 bool compatInPara = false;
263 bool foundDocEl = false;
264 TextEl el;
265
266 doc.erase (doc.begin(), doc.end());
267 docLen = 0;
268
269 if (compatMode) {
270 // add <Document><Paragraph>
271 SetRecTag (el, OpenTagE);
272 AddTextEl (doc, docLen, el);
273 ToggleParaTag (el, compatInPara);
274 AddTextEl (doc, docLen, el);
275
276 } else {
277 // look for an opening docTag
278 do {
279 if (!ReadTextEl (buf, el, compatMode, compatInPara)) return false;
280 } while (el.elType != OpenTagE || el.tagName != docTag);
281
282 AddTextEl (doc, docLen, el);
283 }
284
285
286 // get all elements until the closing docTag
287 while (ReadTextEl (buf, el, compatMode, compatInPara)) {
288 foundDocEl = true;
289 AddTextEl (doc, docLen, el);
290 if (el.elType == CloseTagE && el.tagName == docTag)
291 return true;
292 }
293
294
295 if (compatMode) {
296 if (!foundDocEl) { // end of text
297 doc.erase (doc.begin(), doc.end());
298 return false;
299 }
300
301 // if we got here then the eof was encountered before
302 // the closing document tag
303 if (compatInPara) {
304 // need to close this paragraph
305 ToggleParaTag (el, compatInPara);
306 AddTextEl (doc, docLen, el);
307 }
308 // close this document
309 SetRecTag (el, CloseTagE);
310 AddTextEl (doc, docLen, el);
311 }
312
313 return true;
314}
315
316// copy of readdoc to get TextElArray from u_char buffer
317bool ReadDoc(u_char ** buffer, const UCArray &docTag,
318 TextElArray &doc, unsigned long &docLen,
319 bool compatMode) {
320 bool compatInPara = false;
321 bool foundDocEl = false;
322 TextEl el;
323
324 doc.erase (doc.begin(), doc.end());
325 docLen = 0;
326
327 if (compatMode) {
328 // add <Document><Paragraph>
329 SetRecTag (el, OpenTagE);
330 AddTextEl (doc, docLen, el);
331 ToggleParaTag (el, compatInPara);
332 AddTextEl (doc, docLen, el);
333
334 } else {
335 // look for an opening docTag
336 do {
337 if (!ReadTextEl (buffer, el, compatMode, compatInPara)) return false;
338 } while (el.elType != OpenTagE || el.tagName != docTag);
339
340 AddTextEl (doc, docLen, el);
341 }
342
343
344 // get all elements until the closing docTag
345 while (ReadTextEl (buffer, el, compatMode, compatInPara)) {
346 foundDocEl = true;
347 AddTextEl (doc, docLen, el);
348 if (el.elType == CloseTagE && el.tagName == docTag)
349 return true;
350 }
351
352
353 if (compatMode) {
354 if (!foundDocEl) { // end of text
355 doc.erase (doc.begin(), doc.end());
356 return false;
357 }
358
359 // if we got here then the eof was encountered before
360 // the closing document tag
361 if (compatInPara) {
362 // need to close this paragraph
363 ToggleParaTag (el, compatInPara);
364 AddTextEl (doc, docLen, el);
365 }
366 // close this document
367 SetRecTag (el, CloseTagE);
368 AddTextEl (doc, docLen, el);
369 }
370
371 return true;
372}
373
374
Note: See TracBrowser for help on using the repository browser.